From: Philip Homburg Date: Thu, 11 May 2006 14:57:23 +0000 (+0000) Subject: New interface between PM and FS. X-Git-Tag: v3.1.3~364 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/doc/processing.js?a=commitdiff_plain;h=773844a816d548c6f89d6634beec3ff334319815;p=minix.git New interface between PM and FS. --- diff --git a/servers/fs/Makefile b/servers/fs/Makefile index d331d3f8f..1e60f7d5f 100644 --- a/servers/fs/Makefile +++ b/servers/fs/Makefile @@ -14,7 +14,7 @@ LDFLAGS = -i LIBS = -lsys -lsysutil -ltimers OBJ = main.o open.o read.o write.o pipe.o dmap.o \ - device.o path.o mount.o link.o super.o inode.o \ + device.o exec.o path.o mount.o link.o super.o inode.o \ cache.o cache2.o filedes.o stadir.o protect.o time.o \ lock.o misc.o utility.o select.o timers.o table.o diff --git a/servers/fs/cache.c b/servers/fs/cache.c index dfc4b6735..7189931fa 100644 --- a/servers/fs/cache.c +++ b/servers/fs/cache.c @@ -58,6 +58,7 @@ int only_search; /* if NO_READ, don't read, else act normal */ if (dev != NO_DEV) { b = (int) block & HASH_MASK; bp = buf_hash[b]; + while (bp != NIL_BUF) { if (bp->b_blocknr == block && bp->b_dev == dev) { /* Block needed has been found. */ @@ -84,12 +85,14 @@ int only_search; /* if NO_READ, don't read, else act normal */ } else { /* The block just taken is not on the front of its hash chain. */ while (prev_ptr->b_hash != NIL_BUF) + { if (prev_ptr->b_hash == bp) { prev_ptr->b_hash = bp->b_hash; /* found it */ break; } else { prev_ptr = prev_ptr->b_hash; /* keep looking */ } + } } /* If the block taken is dirty, make it clean by writing it to the disk. @@ -269,7 +272,7 @@ int rw_flag; /* READING or WRITING */ if ( (dev = bp->b_dev) != NO_DEV) { pos = (off_t) bp->b_blocknr * block_size; op = (rw_flag == READING ? DEV_READ : DEV_WRITE); - r = dev_io(op, dev, FS_PROC_NR, bp->b_data, pos, block_size, 0); + r = dev_bio(op, dev, FS_PROC_NR, bp->b_data, pos, block_size, 0); if (r != block_size) { if (r >= 0) r = END_OF_FILE; if (r != END_OF_FILE) @@ -371,7 +374,7 @@ int rw_flag; /* READING or WRITING */ iop->iov_addr = (vir_bytes) bp->b_data; iop->iov_size = block_size; } - r = dev_io(rw_flag == WRITING ? DEV_SCATTER : DEV_GATHER, + r = dev_bio(rw_flag == WRITING ? DEV_SCATTER : DEV_GATHER, dev, FS_PROC_NR, iovec, (off_t) bufq[0]->b_blocknr * block_size, j, 0); @@ -433,12 +436,123 @@ struct buf *bp; next_ptr = bp->b_next; /* successor on LRU chain */ prev_ptr = bp->b_prev; /* predecessor on LRU chain */ if (prev_ptr != NIL_BUF) + { prev_ptr->b_next = next_ptr; + } else front = next_ptr; /* this block was at front of chain */ if (next_ptr != NIL_BUF) + { next_ptr->b_prev = prev_ptr; + } else rear = prev_ptr; /* this block was at rear of chain */ } + +#if 0 +PRIVATE void check_lru() +{ + int i; + struct buf *bp, *nbp; + + for (i= 0; ib_next; + if (nbp != NULL && (nbp < buf || nbp >= &buf[NR_BUFS])) + { + stacktrace(); + panic(__FILE__, "check_lru: bad next", nbp); + } + nbp= bp->b_prev; + if (nbp != NULL && (nbp < buf || nbp >= &buf[NR_BUFS])) + { + stacktrace(); + panic(__FILE__, "check_lru: bad next", nbp); + } + } +} + +PRIVATE void check_buf(bp) +struct buf *bp; +{ + struct buf *nbp; + + if (bp < buf || bp >= &buf[NR_BUFS]) + { + stacktrace(); + panic(__FILE__, "check_buf: bad buf", bp); + } + nbp= bp->b_next; + if (nbp != NULL && (nbp < buf || nbp >= &buf[NR_BUFS])) + { + stacktrace(); + panic(__FILE__, "check_buf: bad next", nbp); + } + nbp= bp->b_prev; + if (nbp != NULL && (nbp < buf || nbp >= &buf[NR_BUFS])) + { + stacktrace(); + panic(__FILE__, "check_buf: bad next", nbp); + } +} + +PRIVATE void check_hash_chains() +{ + int i; + struct buf *bp; + + for (i= 0; i= &buf[NR_BUFS]) + { + panic(__FILE__, "check_hash_chains: bad buf", + bp); + } + bp= bp->b_hash; + } + } +} + +PUBLIC void check_hash_chainsX(file, line) +char *file; +int line; +{ + int i; + struct buf *bp; + + for (i= 0; i= &buf[NR_BUFS]) + { + printf( + "check_hash_chainsX: called from %s, %d\n", + file, line); + panic(__FILE__, "check_hash_chainsX: bad buf", + bp); + } + bp= bp->b_hash; + } + } +} + +PRIVATE void check_hash_chain(bp) +struct buf *bp; +{ + while (bp) + { + if (bp < buf || bp >= &buf[NR_BUFS]) + { + panic(__FILE__, "check_hash_chain: bad buf", bp); + } + bp= bp->b_hash; + } +} +#endif diff --git a/servers/fs/device.c b/servers/fs/device.c index 831713ed4..9aeb73bab 100644 --- a/servers/fs/device.c +++ b/servers/fs/device.c @@ -14,7 +14,7 @@ * ctty_opcl: perform controlling-tty-specific processing for open/close * ctty_io: perform controlling-tty-specific processing for I/O * do_ioctl: perform the IOCTL system call - * do_setsid: perform the SETSID system call (FS side) + * pm_setsid: perform the SETSID system call (FS side) */ #include "fs.h" @@ -117,6 +117,91 @@ PUBLIC void dev_status(message *m) return; } +/*===========================================================================* + * dev_bio * + *===========================================================================*/ +PUBLIC int dev_bio(op, dev, proc_e, buf, pos, bytes, flags) +int op; /* DEV_READ, DEV_WRITE, DEV_IOCTL, etc. */ +dev_t dev; /* major-minor device number */ +int proc_e; /* in whose address space is buf? */ +void *buf; /* virtual address of the buffer */ +off_t pos; /* byte position */ +int bytes; /* how many bytes to transfer */ +int flags; /* special flags, like O_NONBLOCK */ +{ +/* Read or write from a device. The parameter 'dev' tells which one. */ + struct dmap *dp; + int r; + message m; + + /* Determine task dmap. */ + dp = &dmap[(dev >> MAJOR) & BYTE]; + + for (;;) + { + /* See if driver is roughly valid. */ + if (dp->dmap_driver == NONE) { + printf("FS: dev_io: no driver for dev %x\n", dev); + return ENXIO; + } + + /* Set up the message passed to task. */ + m.m_type = op; + m.DEVICE = (dev >> MINOR) & BYTE; + m.POSITION = pos; + m.IO_ENDPT = proc_e; + m.ADDRESS = buf; + m.COUNT = bytes; + m.TTY_FLAGS = flags; + + /* Call the task. */ + (*dp->dmap_io)(dp->dmap_driver, &m); + + if(dp->dmap_driver == NONE) { + /* Driver has vanished. Wait for a new one. */ + for (;;) + { + r= receive(RS_PROC_NR, &m); + if (r != OK) + { + panic(__FILE__, + "dev_bio: unable to receive from RS", + r); + } + if (m.m_type == DEVCTL) + { + r= fs_devctl(m.ctl_req, m.dev_nr, m.driver_nr, + m.dev_style, m.m_force); + } + else + { + panic(__FILE__, + "dev_bio: got message from RS, type", + m.m_type); + } + m.m_type= r; + r= send(RS_PROC_NR, &m); + if (r != OK) + { + panic(__FILE__, + "dev_bio: unable to send to RS", + r); + } + if (dp->dmap_driver != NONE) + break; + } + printf("dev_bio: trying new driver\n"); + continue; + } + + /* Task has completed. See if call completed. */ + if (m.REP_STATUS == SUSPEND) { + panic(__FILE__, "dev_bio: driver returned SUSPEND", NO_NUM); + } + return(m.REP_STATUS); + } +} + /*===========================================================================* * dev_io * *===========================================================================*/ @@ -208,11 +293,6 @@ int flags; /* mode bits and flags */ printf("FS: gen_opcl: no driver for dev %x\n", dev); return ENXIO; } - if(isokendpt(dp->dmap_driver, &dummyproc) != OK) { - printf("FS: gen_opcl: old driver for dev %x (%d)\n", - dev, dp->dmap_driver); - return ENXIO; - } /* Call the task. */ (*dp->dmap_io)(dp->dmap_driver, &dev_mess); @@ -274,9 +354,10 @@ int flags; /* mode bits and flags */ } /*===========================================================================* - * do_setsid * + * pm_setsid * *===========================================================================*/ -PUBLIC int do_setsid() +PUBLIC void pm_setsid(proc_e) +int proc_e; { /* Perform the FS side of the SETSID call, i.e. get rid of the controlling * terminal of a process, and make the process a session leader. @@ -284,15 +365,11 @@ PUBLIC int do_setsid() register struct fproc *rfp; int slot; - /* Only MM may do the SETSID call directly. */ - if (who_e != PM_PROC_NR) return(ENOSYS); - /* Make the process a session leader with no controlling tty. */ - okendpt(m_in.endpt1, &slot); + okendpt(proc_e, &slot); rfp = &fproc[slot]; rfp->fp_sesldr = TRUE; rfp->fp_tty = 0; - return(OK); } /*===========================================================================* diff --git a/servers/fs/dmap.c b/servers/fs/dmap.c index 8f16a6488..bc33cc0b3 100644 --- a/servers/fs/dmap.c +++ b/servers/fs/dmap.c @@ -60,31 +60,47 @@ PRIVATE struct dmap init_dmap[] = { *===========================================================================*/ PUBLIC int do_devctl() { - int result, proc_nr_e, proc_nr_n; + return fs_devctl(m_in.ctl_req, m_in.dev_nr, m_in.driver_nr, + m_in.dev_style, m_in.m_force); +} + +/*===========================================================================* + * fs_devctl * + *===========================================================================*/ +PUBLIC int fs_devctl(req, dev, proc_nr_e, style, force) +int req; +int dev; +int proc_nr_e; +int style; +int force; +{ + int result, proc_nr_n; - switch(m_in.ctl_req) { + switch(req) { case DEV_MAP: - /* Check process number of new driver. */ - proc_nr_e= m_in.driver_nr; - if (isokendpt(proc_nr_e, &proc_nr_n) != OK) - return(EINVAL); + if (!force) + { + /* Check process number of new driver. */ + if (isokendpt(proc_nr_e, &proc_nr_n) != OK) + return(EINVAL); + } /* Try to update device mapping. */ - result = map_driver(m_in.dev_nr, proc_nr_e, m_in.dev_style); + result = map_driver(dev, proc_nr_e, style, force); if (result == OK) { /* If a driver has completed its exec(), it can be announced to be * up. */ - if(fproc[proc_nr_n].fp_execced) { - dev_up(m_in.dev_nr); + if(force || fproc[proc_nr_n].fp_execced) { + dev_up(dev); } else { - dmap[m_in.dev_nr].dmap_flags |= DMAP_BABY; + dmap[dev].dmap_flags |= DMAP_BABY; } } break; case DEV_UNMAP: - result = map_driver(m_in.dev_nr, NONE, 0); + result = map_driver(dev, NONE, 0, 0); break; default: result = EINVAL; @@ -95,10 +111,11 @@ PUBLIC int do_devctl() /*===========================================================================* * map_driver * *===========================================================================*/ -PUBLIC int map_driver(major, proc_nr_e, style) +PUBLIC int map_driver(major, proc_nr_e, style, force) int major; /* major number of the device */ int proc_nr_e; /* process number of the driver */ int style; /* style of the device */ +int force; { /* Set a new device driver mapping in the dmap table. Given that correct * arguments are given, this only works if the entry is mutable and the @@ -131,9 +148,12 @@ int style; /* style of the device */ if (! (dp->dmap_flags & DMAP_MUTABLE)) return(EPERM); if (dp->dmap_flags & DMAP_BUSY) return(EBUSY); - /* Check process number of new driver. */ - if (isokendpt(proc_nr_e, &proc_nr_n) != OK) - return(EINVAL); + if (!force) + { + /* Check process number of new driver. */ + if (isokendpt(proc_nr_e, &proc_nr_n) != OK) + return(EINVAL); + } /* Try to update the entry. */ switch (style) { @@ -156,7 +176,7 @@ PUBLIC void dmap_unmap_by_endpt(int proc_nr_e) int i, r; for (i=0; i +#include +#include +#include +#include +#include +#include +#include "buf.h" +#include "fproc.h" +#include "inode.h" +#include "param.h" +#include "super.h" + +FORWARD _PROTOTYPE( int exec_newmem, (int proc_e, vir_bytes text_bytes, + vir_bytes data_bytes, vir_bytes bss_bytes, vir_bytes tot_bytes, + vir_bytes frame_len, int sep_id, + Dev_t st_dev, ino_t st_ino, time_t st_ctime, char *progname, + int new_uid, int new_gid, + vir_bytes *stack_topp, int *load_textp, int *allow_setuidp) ); +FORWARD _PROTOTYPE( int read_header, (struct inode *rip, int *sep_id, + vir_bytes *text_bytes, vir_bytes *data_bytes, + vir_bytes *bss_bytes, phys_bytes *tot_bytes, vir_bytes *pc, + int *hdrlenp) ); +FORWARD _PROTOTYPE( int patch_stack, (struct inode *rip, + char stack[ARG_MAX], vir_bytes *stk_bytes) ); +FORWARD _PROTOTYPE( int insert_arg, (char stack[ARG_MAX], + vir_bytes *stk_bytes, char *arg, int replace) ); +FORWARD _PROTOTYPE( void patch_ptr, (char stack[ARG_MAX], + vir_bytes base) ); +FORWARD _PROTOTYPE( int read_seg, (struct inode *rip, off_t off, + int proc_e, int seg, phys_bytes seg_bytes) ); +FORWARD _PROTOTYPE( void clo_exec, (struct fproc *rfp) ); + +#define ESCRIPT (-2000) /* Returned by read_header for a #! script. */ +#define PTRSIZE sizeof(char *) /* Size of pointers in argv[] and envp[]. */ + +/*===========================================================================* + * pm_exec * + *===========================================================================*/ +PUBLIC int pm_exec(proc_e, path, path_len, frame, frame_len) +int proc_e; +char *path; +vir_bytes path_len; +char *frame; +vir_bytes frame_len; +{ +/* Perform the execve(name, argv, envp) call. The user library builds a + * complete stack image, including pointers, args, environ, etc. The stack + * is copied to a buffer inside FS, and then to the new core image. + */ + int r, sep_id, round, proc_s, hdrlen, load_text, allow_setuid; + vir_bytes text_bytes, data_bytes, bss_bytes, pc; + phys_bytes tot_bytes; /* total space for program, including gap */ + vir_bytes stack_top, vsp; + off_t off; + uid_t new_uid; + gid_t new_gid; + struct fproc *rfp; + struct inode *rip; + char *cp; + char progname[PROC_NAME_LEN]; + + static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */ + + okendpt(proc_e, &proc_s); + rfp= fp= &fproc[proc_s]; + who_e= proc_e; + who_p= proc_s; + super_user = (fp->fp_effuid == SU_UID ? TRUE : FALSE); /* su? */ + + /* Get the exec file name. */ + r= fetch_name(path, path_len, 0); + if (r != OK) + { + printf("pm_exec: fetch_name failed\n"); + return(r); /* file name not in user data segment */ + } + + /* Fetch the stack from the user before destroying the old core image. */ + if (frame_len > ARG_MAX) + { + printf("pm_exec: bad frame_len\n"); + return(ENOMEM); /* stack too big */ + } + r = sys_datacopy(proc_e, (vir_bytes) frame, + SELF, (vir_bytes) mbuf, (phys_bytes)frame_len); + /* can't fetch stack (e.g. bad virtual addr) */ + if (r != OK) + { + printf("pm_exec: sys_datacopy failed\n"); + return(r); + } + + /* The default is the keep the original user and group IDs */ + new_uid= rfp->fp_effuid; + new_gid= rfp->fp_effgid; + + for (round= 0; round < 2; round++) + /* round = 0 (first attempt), or 1 (interpreted script) */ + { + /* Save the name of the program */ + (cp= strrchr(user_path, '/')) ? cp++ : (cp= user_path); + + strncpy(progname, cp, PROC_NAME_LEN-1); + progname[PROC_NAME_LEN-1] = '\0'; + +#if 0 + printf("pm_exec: eat_path '%s'\n", user_path); +#endif + rip= eat_path(user_path); + if (rip == NIL_INODE) + { + return(err_code); + } + if ((rip->i_mode & I_TYPE) != I_REGULAR) + r = ENOEXEC; + else + r = forbidden(rip, X_BIT); /* check if file is executable */ + if (r != OK) { + put_inode(rip); + printf("pm_exec: bad executable\n"); + return(r); + } + + if (round == 0) + { + /* Deal with setuid/setgid executables */ + if (rip->i_mode & I_SET_UID_BIT) + new_uid = rip->i_uid; + if (rip->i_mode & I_SET_GID_BIT) + new_gid = rip->i_gid; + } + + /* Read the file header and extract the segment sizes. */ + r = read_header(rip, &sep_id, &text_bytes, &data_bytes, &bss_bytes, + &tot_bytes, &pc, &hdrlen); + if (r != ESCRIPT || round != 0) + break; + + /* Get fresh copy of the file name. */ + r= fetch_name(path, path_len, 0); + if (r != OK) + { + printf("pm_exec: 2nd fetch_name failed\n"); + put_inode(rip); + return(r); /* strange */ + } + r= patch_stack(rip, mbuf, &frame_len); + put_inode(rip); + if (r != OK) + { + printf("pm_exec: patch stack\n"); + return r; + } + } + + if (r != OK) + { + printf("pm_exec: returning ENOEXEC, r = %d\n", r); + return ENOEXEC; + } + + r= exec_newmem(proc_e, text_bytes, data_bytes, bss_bytes, tot_bytes, + frame_len, sep_id, rip->i_dev, rip->i_num, rip->i_ctime, + progname, new_uid, new_gid, &stack_top, &load_text, &allow_setuid); + if (r != OK) + { + printf("pm_exec: exec_newmap failed: %d\n", r); + put_inode(rip); + return r; + } + + /* Patch up stack and copy it from FS to new core image. */ + vsp = stack_top; + vsp -= frame_len; + patch_ptr(mbuf, vsp); + r = sys_datacopy(SELF, (vir_bytes) mbuf, + proc_e, (vir_bytes) vsp, (phys_bytes)frame_len); + if (r != OK) panic(__FILE__,"pm_exec stack copy err on", proc_e); + + off = hdrlen; + + /* Read in text and data segments. */ + if (load_text) { + r= read_seg(rip, off, proc_e, T, text_bytes); + } + off += text_bytes; + if (r == OK) + r= read_seg(rip, off, proc_e, D, data_bytes); + + put_inode(rip); + + if (r != OK) return r; + + clo_exec(rfp); + + if (allow_setuid) + { + rfp->fp_effuid= new_uid; + rfp->fp_effgid= new_gid; + } + + /* This child has now exec()ced. */ + rfp->fp_execced = 1; + + /* Check if this is a driver that can now be useful. */ + dmap_endpt_up(rfp->fp_endpoint); + + return OK; +} + + +/*===========================================================================* + * exec_newmem * + *===========================================================================*/ +PRIVATE int exec_newmem(proc_e, text_bytes, data_bytes, bss_bytes, tot_bytes, + frame_len, sep_id, st_dev, st_ino, st_ctime, progname, + new_uid, new_gid, stack_topp, load_textp, allow_setuidp) +int proc_e; +vir_bytes text_bytes; +vir_bytes data_bytes; +vir_bytes bss_bytes; +vir_bytes tot_bytes; +vir_bytes frame_len; +int sep_id; +dev_t st_dev; +ino_t st_ino; +time_t st_ctime; +int new_uid; +int new_gid; +char *progname; +vir_bytes *stack_topp; +int *load_textp; +int *allow_setuidp; +{ + int r; + struct exec_newmem e; + message m; + + e.text_bytes= text_bytes; + e.data_bytes= data_bytes; + e.bss_bytes= bss_bytes; + e.tot_bytes= tot_bytes; + e.args_bytes= frame_len; + e.sep_id= sep_id; + e.st_dev= st_dev; + e.st_ino= st_ino; + e.st_ctime= st_ctime; + e.new_uid= new_uid; + e.new_gid= new_gid; + strncpy(e.progname, progname, sizeof(e.progname)-1); + e.progname[sizeof(e.progname)-1]= '\0'; + + m.m_type= EXEC_NEWMEM; + m.EXC_NM_PROC= proc_e; + m.EXC_NM_PTR= (char *)&e; + r= sendrec(PM_PROC_NR, &m); + if (r != OK) + return r; +#if 0 + printf("exec_newmem: r = %d, m_type = %d\n", r, m.m_type); +#endif + *stack_topp= m.m1_i1; + *load_textp= !!(m.m1_i2 & EXC_NM_RF_LOAD_TEXT); + *allow_setuidp= !!(m.m1_i2 & EXC_NM_RF_ALLOW_SETUID); +#if 0 + printf("exec_newmem: stack_top = 0x%x\n", *stack_topp); + printf("exec_newmem: load_text = %d\n", *load_textp); +#endif + return m.m_type; +} + + +/*===========================================================================* + * read_header * + *===========================================================================*/ +PRIVATE int read_header(rip, sep_id, text_bytes, data_bytes, bss_bytes, + tot_bytes, pc, hdrlenp) +struct inode *rip; /* inode for reading exec file */ +int *sep_id; /* true iff sep I&D */ +vir_bytes *text_bytes; /* place to return text size */ +vir_bytes *data_bytes; /* place to return initialized data size */ +vir_bytes *bss_bytes; /* place to return bss size */ +phys_bytes *tot_bytes; /* place to return total size */ +vir_bytes *pc; /* program entry point (initial PC) */ +int *hdrlenp; +{ +/* Read the header and extract the text, data, bss and total sizes from it. */ + off_t pos; + block_t b; + struct buf *bp; + struct exec hdr; /* a.out header is read in here */ + + /* Read the header and check the magic number. The standard MINIX header + * is defined in . It consists of 8 chars followed by 6 longs. + * Then come 4 more longs that are not used here. + * Byte 0: magic number 0x01 + * Byte 1: magic number 0x03 + * Byte 2: normal = 0x10 (not checked, 0 is OK), separate I/D = 0x20 + * Byte 3: CPU type, Intel 16 bit = 0x04, Intel 32 bit = 0x10, + * Motorola = 0x0B, Sun SPARC = 0x17 + * Byte 4: Header length = 0x20 + * Bytes 5-7 are not used. + * + * Now come the 6 longs + * Bytes 8-11: size of text segments in bytes + * Bytes 12-15: size of initialized data segment in bytes + * Bytes 16-19: size of bss in bytes + * Bytes 20-23: program entry point + * Bytes 24-27: total memory allocated to program (text, data + stack) + * Bytes 28-31: size of symbol table in bytes + * The longs are represented in a machine dependent order, + * little-endian on the 8088, big-endian on the 68000. + * The header is followed directly by the text and data segments, and the + * symbol table (if any). The sizes are given in the header. Only the + * text and data segments are copied into memory by exec. The header is + * used here only. The symbol table is for the benefit of a debugger and + * is ignored here. + */ + + pos= 0; /* Read from the start of the file */ + b = read_map(rip, pos); /* get block number */ + + if (b == 0) /* Hole */ + return ENOEXEC; + + bp = get_block(rip->i_dev, b, NORMAL); /* get block */ + + /* Interpreted script? */ + if (bp->b_data[0] == '#' && bp->b_data[1] == '!' && rip->i_size >= 2) + { + put_block(bp, FULL_DATA_BLOCK); + return ESCRIPT; + } + + memcpy(&hdr, bp->b_data, sizeof(hdr)); + put_block(bp, FULL_DATA_BLOCK); + + if (rip->i_size < A_MINHDR) return(ENOEXEC); + + /* Check magic number, cpu type, and flags. */ + if (BADMAG(hdr)) return(ENOEXEC); +#if (CHIP == INTEL && _WORD_SIZE == 2) + if (hdr.a_cpu != A_I8086) return(ENOEXEC); +#endif +#if (CHIP == INTEL && _WORD_SIZE == 4) + if (hdr.a_cpu != A_I80386) return(ENOEXEC); +#endif + if ((hdr.a_flags & ~(A_NSYM | A_EXEC | A_SEP)) != 0) return(ENOEXEC); + + *sep_id = !!(hdr.a_flags & A_SEP); /* separate I & D or not */ + + /* Get text and data sizes. */ + *text_bytes = (vir_bytes) hdr.a_text; /* text size in bytes */ + *data_bytes = (vir_bytes) hdr.a_data; /* data size in bytes */ + *bss_bytes = (vir_bytes) hdr.a_bss; /* bss size in bytes */ + *tot_bytes = hdr.a_total; /* total bytes to allocate for prog */ + if (*tot_bytes == 0) return(ENOEXEC); + + if (!*sep_id) { + /* If I & D space is not separated, it is all considered data. Text=0*/ + *data_bytes += *text_bytes; + *text_bytes = 0; + } + *pc = hdr.a_entry; /* initial address to start execution */ + *hdrlenp = hdr.a_hdrlen & BYTE; /* header length */ + + return(OK); +} + +/*===========================================================================* + * patch_stack * + *===========================================================================*/ +PRIVATE int patch_stack(rip, stack, stk_bytes) +struct inode *rip; /* pointer for open script file */ +char stack[ARG_MAX]; /* pointer to stack image within FS */ +vir_bytes *stk_bytes; /* size of initial stack */ +{ +/* Patch the argument vector to include the path name of the script to be + * interpreted, and all strings on the #! line. Returns the path name of + * the interpreter. + */ + enum { INSERT=FALSE, REPLACE=TRUE }; + int n; + off_t pos; + block_t b; + struct buf *bp; + char *sp, *interp = NULL; + + /* Make user_path the new argv[0]. */ + if (!insert_arg(stack, stk_bytes, user_path, REPLACE)) return(ENOMEM); + + pos= 0; /* Read from the start of the file */ + b = read_map(rip, pos); /* get block number */ + if (b == 0) /* Hole */ + return ENOEXEC; + + bp = get_block(rip->i_dev, b, NORMAL); /* get block */ + n= rip->i_size; + if (n > rip->i_sp->s_block_size) + n= rip->i_sp->s_block_size; + if (n < 2) + { + put_block(bp, FULL_DATA_BLOCK); + return ENOEXEC; + } + sp= bp->b_data+2; /* just behind the #! */ + n -= 2; + if (n > PATH_MAX) + n= PATH_MAX; + + /* Use the user_path variable for temporary storage */ + memcpy(user_path, sp, n); + put_block(bp, FULL_DATA_BLOCK); + + if ((sp= memchr(user_path, '\n', n)) == NULL) /* must be a proper line */ + return(ENOEXEC); + + /* Move sp backwards through script[], prepending each string to stack. */ + for (;;) { + /* skip spaces behind argument. */ + while (sp > user_path && (*--sp == ' ' || *sp == '\t')) {} + if (sp == user_path) break; + + sp[1] = 0; + /* Move to the start of the argument. */ + while (sp > user_path && sp[-1] != ' ' && sp[-1] != '\t') --sp; + + interp = sp; + if (!insert_arg(stack, stk_bytes, sp, INSERT)) return(ENOMEM); + } + + /* Round *stk_bytes up to the size of a pointer for alignment contraints. */ + *stk_bytes= ((*stk_bytes + PTRSIZE - 1) / PTRSIZE) * PTRSIZE; + + if (interp != user_path) + memmove(user_path, interp, strlen(interp)+1); + return(OK); +} + +/*===========================================================================* + * insert_arg * + *===========================================================================*/ +PRIVATE int insert_arg(stack, stk_bytes, arg, replace) +char stack[ARG_MAX]; /* pointer to stack image within PM */ +vir_bytes *stk_bytes; /* size of initial stack */ +char *arg; /* argument to prepend/replace as new argv[0] */ +int replace; +{ +/* Patch the stack so that arg will become argv[0]. Be careful, the stack may + * be filled with garbage, although it normally looks like this: + * nargs argv[0] ... argv[nargs-1] NULL envp[0] ... NULL + * followed by the strings "pointed" to by the argv[i] and the envp[i]. The + * pointers are really offsets from the start of stack. + * Return true iff the operation succeeded. + */ + int offset, a0, a1, old_bytes = *stk_bytes; + + /* Prepending arg adds at least one string and a zero byte. */ + offset = strlen(arg) + 1; + + a0 = (int) ((char **) stack)[1]; /* argv[0] */ + if (a0 < 4 * PTRSIZE || a0 >= old_bytes) return(FALSE); + + a1 = a0; /* a1 will point to the strings to be moved */ + if (replace) { + /* Move a1 to the end of argv[0][] (argv[1] if nargs > 1). */ + do { + if (a1 == old_bytes) return(FALSE); + --offset; + } while (stack[a1++] != 0); + } else { + offset += PTRSIZE; /* new argv[0] needs new pointer in argv[] */ + a0 += PTRSIZE; /* location of new argv[0][]. */ + } + + /* stack will grow by offset bytes (or shrink by -offset bytes) */ + if ((*stk_bytes += offset) > ARG_MAX) return(FALSE); + + /* Reposition the strings by offset bytes */ + memmove(stack + a1 + offset, stack + a1, old_bytes - a1); + + strcpy(stack + a0, arg); /* Put arg in the new space. */ + + if (!replace) { + /* Make space for a new argv[0]. */ + memmove(stack + 2 * PTRSIZE, stack + 1 * PTRSIZE, a0 - 2 * PTRSIZE); + + ((char **) stack)[0]++; /* nargs++; */ + } + /* Now patch up argv[] and envp[] by offset. */ + patch_ptr(stack, (vir_bytes) offset); + ((char **) stack)[1] = (char *) a0; /* set argv[0] correctly */ + return(TRUE); +} + +/*===========================================================================* + * patch_ptr * + *===========================================================================*/ +PRIVATE void patch_ptr(stack, base) +char stack[ARG_MAX]; /* pointer to stack image within PM */ +vir_bytes base; /* virtual address of stack base inside user */ +{ +/* When doing an exec(name, argv, envp) call, the user builds up a stack + * image with arg and env pointers relative to the start of the stack. Now + * these pointers must be relocated, since the stack is not positioned at + * address 0 in the user's address space. + */ + + char **ap, flag; + vir_bytes v; + + flag = 0; /* counts number of 0-pointers seen */ + ap = (char **) stack; /* points initially to 'nargs' */ + ap++; /* now points to argv[0] */ + while (flag < 2) { + if (ap >= (char **) &stack[ARG_MAX]) return; /* too bad */ + if (*ap != NULL) { + v = (vir_bytes) *ap; /* v is relative pointer */ + v += base; /* relocate it */ + *ap = (char *) v; /* put it back */ + } else { + flag++; + } + ap++; + } +} + +/*===========================================================================* + * read_seg * + *===========================================================================*/ +PRIVATE int read_seg(rip, off, proc_e, seg, seg_bytes) +struct inode *rip; /* inode descriptor to read from */ +off_t off; /* offset in file */ +int proc_e; /* process number (endpoint) */ +int seg; /* T, D, or S */ +phys_bytes seg_bytes; /* how much is to be transferred? */ +{ +/* + * The byte count on read is usually smaller than the segment count, because + * a segment is padded out to a click multiple, and the data segment is only + * partially initialized. + */ + + int r, block_size; + off_t n, o, b_off, seg_off; + block_t b; + struct buf *bp; + + /* Make sure that the file is big enough */ + if (rip->i_size < off+seg_bytes) + return EIO; + + block_size= rip->i_sp->s_block_size; + seg_off= 0; + for (o= off - (off % block_size); o < off+seg_bytes; o += block_size) + { + b= read_map(rip, o); + if (b == NO_BLOCK) + return EIO; /* Executables don't have holes */ + + bp = get_block(rip->i_dev, b, NORMAL); /* get block */ + if (o < off) + b_off= off-o; + else + b_off= 0; + n= block_size-b_off; + if (o+b_off+n > off+seg_bytes) + n= off+seg_bytes-(o+b_off); + r= sys_vircopy(SELF, D, (vir_bytes)bp->b_data+b_off, + proc_e, seg, seg_off, n); + put_block(bp, FULL_DATA_BLOCK); + + if (r != OK) + return r; + + seg_off += n; + } + + return OK; +} + + +/*===========================================================================* + * clo_exec * + *===========================================================================*/ +PRIVATE void clo_exec(rfp) +struct fproc *rfp; +{ +/* Files can be marked with the FD_CLOEXEC bit (in fp->fp_cloexec). + */ + int i, proc; + long bitmap; + + /* The array of FD_CLOEXEC bits is in the fp_cloexec bit map. */ + bitmap = rfp->fp_cloexec; + if (bitmap) { + /* Check the file desriptors one by one for presence of FD_CLOEXEC. */ + for (i = 0; i < OPEN_MAX; i++) { + if ( (bitmap >> i) & 01) (void) close_fd(rfp, i); + } + } +} + + diff --git a/servers/fs/filedes.c b/servers/fs/filedes.c index 85e1e2021..48548bfef 100644 --- a/servers/fs/filedes.c +++ b/servers/fs/filedes.c @@ -66,11 +66,23 @@ PUBLIC int get_fd(int start, mode_t bits, int *k, struct filp **fpt) PUBLIC struct filp *get_filp(fild) int fild; /* file descriptor */ { +/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */ + + return get_filp2(fp, fild); +} + +/*===========================================================================* + * get_filp2 * + *===========================================================================*/ +PUBLIC struct filp *get_filp2(rfp, fild) +register struct fproc *rfp; +int fild; /* file descriptor */ +{ /* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */ err_code = EBADF; if (fild < 0 || fild >= OPEN_MAX ) return(NIL_FILP); - return(fp->fp_filp[fild]); /* may also be NIL_FILP */ + return(rfp->fp_filp[fild]); /* may also be NIL_FILP */ } /*===========================================================================* diff --git a/servers/fs/main.c b/servers/fs/main.c index d6c4d54a2..5ed17d85a 100644 --- a/servers/fs/main.c +++ b/servers/fs/main.c @@ -34,6 +34,7 @@ struct super_block; /* proto.h needs to know this */ FORWARD _PROTOTYPE( void fs_init, (void) ); FORWARD _PROTOTYPE( void get_work, (void) ); FORWARD _PROTOTYPE( void init_root, (void) ); +FORWARD _PROTOTYPE( void service_pm, (void) ); /*===========================================================================* * main * @@ -48,23 +49,25 @@ PUBLIC int main() fs_init(); - /* This is the main loop that gets work, processes it, and sends replies. */ while (TRUE) { get_work(); /* sets who and call_nr */ fp = &fproc[who_p]; /* pointer to proc table struct */ super_user = (fp->fp_effuid == SU_UID ? TRUE : FALSE); /* su? */ + if (who_e == PM_PROC_NR && call_nr != PROC_EVENT) + printf("FS: strange, got message %d from PM\n", call_nr); + /* Check for special control messages first. */ if (call_nr == PROC_EVENT) { - /* Assume FS got signal. Synchronize, but don't exit. */ - do_sync(); + /* PM tries to get FS to do something */ + service_pm(); } else if (call_nr == SYN_ALARM) { /* Alarm timer expired. Used only for select(). Check it. */ fs_expire_timers(m_in.NOTIFY_TIMESTAMP); } else if ((call_nr & NOTIFY_MESSAGE)) { - /* Device notifies us of an event. */ - dev_status(&m_in); + /* Device notifies us of an event. */ + dev_status(&m_in); } else { /* Call the internal function that does the work. */ if (call_nr < 0 || call_nr >= NCALLS) { @@ -123,6 +126,7 @@ PRIVATE void get_work() panic(__FILE__,"fs receive error", NO_NUM); who_e = m_in.m_source; who_p = _ENDPOINT_P(who_e); + if(who_p < -NR_TASKS || who_p >= NR_PROCS) panic(__FILE__,"receive process out of range", who_p); if(who_p >= 0 && fproc[who_p].fp_endpoint == NONE) { @@ -291,3 +295,123 @@ PRIVATE void init_root() sp->s_rd_only = 0; return; } + +/*===========================================================================* + * service_pm * + *===========================================================================*/ +PRIVATE void service_pm() +{ + int r, call; + message m; + + /* Ask PM for work until there is nothing left to do */ + for (;;) + { + m.m_type= PM_GET_WORK; + r= sendrec(PM_PROC_NR, &m); + if (r != OK) + { + panic("fs", "service_pm: sendrec failed", r); + } + if (m.m_type == PM_IDLE) + break; + call= m.m_type; + switch(call) + { + case PM_STIME: + boottime= m.PM_STIME_TIME; + + /* No need to report status to PM */ + break; + case PM_SETSID: + pm_setsid(m.PM_SETSID_PROC); + + /* No need to report status to PM */ + break; + + case PM_SETGID: + pm_setgid(m.PM_SETGID_PROC, m.PM_SETGID_EGID, + m.PM_SETGID_RGID); + + /* No need to report status to PM */ + break; + + case PM_SETUID: + pm_setuid(m.PM_SETUID_PROC, m.PM_SETUID_EGID, + m.PM_SETUID_RGID); + + /* No need to report status to PM */ + break; + + case PM_FORK: + pm_fork(m.PM_FORK_PPROC, m.PM_FORK_CPROC, + m.PM_FORK_CPID); + + /* No need to report status to PM */ + break; + + case PM_EXIT: + case PM_EXIT_TR: + pm_exit(m.PM_EXIT_PROC); + + /* Reply dummy status to PM for synchronization */ + m.m_type= (call == PM_EXIT_TR ? PM_EXIT_REPLY_TR : + PM_EXIT_REPLY); + /* Keep m.PM_EXIT_PROC */ + + r= send(PM_PROC_NR, &m); + if (r != OK) + panic(__FILE__, "service_pm: send failed", r); + break; + + case PM_UNPAUSE: + case PM_UNPAUSE_TR: + unpause(m.PM_UNPAUSE_PROC); + + /* No need to report status to PM */ + break; + + case PM_REBOOT: + pm_reboot(); + + /* Reply dummy status to PM for synchronization */ + m.m_type= PM_REBOOT_REPLY; + r= send(PM_PROC_NR, &m); + if (r != OK) + panic(__FILE__, "service_pm: send failed", r); + break; + + case PM_EXEC: + r= pm_exec(m.PM_EXEC_PROC, m.PM_EXEC_PATH, + m.PM_EXEC_PATH_LEN, m.PM_EXEC_FRAME, + m.PM_EXEC_FRAME_LEN); + + /* Reply status to PM */ + m.m_type= PM_EXEC_REPLY; + /* Keep m.PM_EXEC_PROC */ + m.PM_EXEC_STATUS= r; + + r= send(PM_PROC_NR, &m); + if (r != OK) + panic(__FILE__, "service_pm: send failed", r); + break; + + case PM_DUMPCORE: + r= pm_dumpcore(m.PM_CORE_PROC, + (struct mem_map *)m.PM_CORE_SEGPTR); + + /* Reply status to PM */ + m.m_type= PM_CORE_REPLY; + /* Keep m.PM_CORE_PROC */ + m.PM_CORE_STATUS= r; + + r= send(PM_PROC_NR, &m); + if (r != OK) + panic(__FILE__, "service_pm: send failed", r); + break; + + default: + panic("fs", "service_pm: unknown call", m.m_type); + } + } +} diff --git a/servers/fs/misc.c b/servers/fs/misc.c index 64087c171..3776e7431 100644 --- a/servers/fs/misc.c +++ b/servers/fs/misc.c @@ -7,14 +7,16 @@ * do_fcntl: perform the FCNTL system call * do_sync: perform the SYNC system call * do_fsync: perform the FSYNC system call - * do_reboot: sync disks and prepare for shutdown - * do_fork: adjust the tables after MM has performed a FORK system call + * pm_reboot: sync disks and prepare for shutdown + * pm_fork: adjust the tables after MM has performed a FORK system call * do_exec: handle files with FD_CLOEXEC on after MM has done an EXEC * do_exit: a process has exited; note that in the tables - * do_set: set uid or gid for some process + * pm_setgid: set group ids for some process + * pm_setuid: set user ids for some process * do_revive: revive a process that was waiting for something (e.g. TTY) * do_svrctl: file system control * do_getsysinfo: request copy of FS data structure + * pm_dumpcore: create a core dump */ #include "fs.h" @@ -23,6 +25,7 @@ #include #include #include +#include #include #include "buf.h" #include "file.h" @@ -31,7 +34,15 @@ #include "param.h" #include "super.h" -FORWARD _PROTOTYPE( int free_proc, (struct fproc *freed, int flags)); +#define CORE_NAME "core" +#define CORE_MODE 0777 /* mode to use on core image files */ + +FORWARD _PROTOTYPE( void free_proc, (struct fproc *freed, int flags)); +FORWARD _PROTOTYPE( int dumpcore, (int proc_e, struct mem_map *seg_ptr)); +FORWARD _PROTOTYPE( int write_bytes, (struct inode *rip, off_t off, + char *buf, size_t bytes)); +FORWARD _PROTOTYPE( int write_seg, (struct inode *rip, off_t off, int proc_e, + int seg, off_t seg_off, phys_bytes seg_bytes)); #define FP_EXITING 1 @@ -252,18 +263,15 @@ PUBLIC int do_fsync() } /*===========================================================================* - * do_reboot * + * pm_reboot * *===========================================================================*/ -PUBLIC int do_reboot() +PUBLIC void pm_reboot() { /* Perform the FS side of the reboot call. */ int i; struct super_block *sp; struct inode dummy; - /* Only PM may make this call directly. */ - if (who_e != PM_PROC_NR) return(EGENERIC); - /* Do exit processing for all leftover processes and servers, * but don't actually exit them (if they were really gone, PM * will tell us about it). @@ -291,35 +299,33 @@ PUBLIC int do_reboot() /* Sync any unwritten buffers. */ do_sync(); - - return(OK); } /*===========================================================================* - * do_fork * + * pm_fork * *===========================================================================*/ -PUBLIC int do_fork() +PUBLIC void pm_fork(pproc, cproc, cpid) +int pproc; /* Parent process */ +int cproc; /* Child process */ +int cpid; /* Child process id */ { /* Perform those aspects of the fork() system call that relate to files. * In particular, let the child inherit its parent's file descriptors. * The parent and child parameters tell who forked off whom. The file - * system uses the same slot numbers as the kernel. Only MM makes this call. + * system uses the same slot numbers as the kernel. */ register struct fproc *cp; int i, parentno, childno; - /* Only PM may make this call directly. */ - if (who_e != PM_PROC_NR) return(EGENERIC); - /* Check up-to-dateness of fproc. */ - okendpt(m_in.parent_endpt, &parentno); + okendpt(pproc, &parentno); /* PM gives child endpoint, which implies process slot information. * Don't call isokendpt, because that will verify if the endpoint * number is correct in fproc, which it won't be. */ - childno = _ENDPOINT_P(m_in.child_endpt); + childno = _ENDPOINT_P(cproc); if(childno < 0 || childno >= NR_PROCS) panic(__FILE__, "FS: bogus child for forking", m_in.child_endpt); if(fproc[childno].fp_pid != PID_FREE) @@ -334,68 +340,24 @@ PUBLIC int do_fork() if (cp->fp_filp[i] != NIL_FILP) cp->fp_filp[i]->filp_count++; /* Fill in new process and endpoint id. */ - cp->fp_pid = m_in.pid; - cp->fp_endpoint = m_in.child_endpt; + cp->fp_pid = cpid; + cp->fp_endpoint = cproc; /* A child is not a process leader. */ cp->fp_sesldr = 0; /* This child has not exec()ced yet. */ cp->fp_execced = 0; -#if 0 -printf("do_fork: child %d, slot %d\n", m_in.child_endpt, cp-fproc); -#endif /* Record the fact that both root and working dir have another user. */ dup_inode(cp->fp_rootdir); dup_inode(cp->fp_workdir); - return(OK); -} - -/*===========================================================================* - * do_exec * - *===========================================================================*/ -PUBLIC int do_exec() -{ -/* Files can be marked with the FD_CLOEXEC bit (in fp->fp_cloexec). When - * MM does an EXEC, it calls FS to allow FS to find these files and close them. - */ - - int i, proc; - long bitmap; - - /* Only PM may make this call directly. */ - if (who_e != PM_PROC_NR) return(EGENERIC); - - /* The array of FD_CLOEXEC bits is in the fp_cloexec bit map. */ - okendpt(m_in.endpt1, &proc); - fp = &fproc[proc]; /* get_filp() needs 'fp' */ - bitmap = fp->fp_cloexec; - if (bitmap) { - /* Check the file desriptors one by one for presence of FD_CLOEXEC. */ - for (i = 0; i < OPEN_MAX; i++) { - m_in.fd = i; - if ( (bitmap >> i) & 01) (void) do_close(); - } - } - - /* This child has now exec()ced. */ - fp->fp_execced = 1; - - /* Reply to caller (PM) directly. */ - reply(who_e, OK); - - /* Check if this is a driver that can now be useful. */ - dmap_endpt_up(fp->fp_endpoint); - - /* Suppress reply to caller (caller already replied to). */ - return SUSPEND; } /*===========================================================================* * free_proc * *===========================================================================*/ -PRIVATE int free_proc(struct fproc *exiter, int flags) +PRIVATE void free_proc(struct fproc *exiter, int flags) { int i, task; register struct fproc *rfp; @@ -408,15 +370,13 @@ PRIVATE int free_proc(struct fproc *exiter, int flags) if (fp->fp_suspended == SUSPENDED) { task = -fp->fp_task; if (task == XPIPE || task == XPOPEN) susp_count--; - m_in.ENDPT = fp->fp_endpoint; - (void) do_unpause(); /* this always succeeds for MM */ + unpause(fp->fp_endpoint); fp->fp_suspended = NOT_SUSPENDED; } /* Loop on file descriptors, closing any that are open. */ for (i = 0; i < OPEN_MAX; i++) { - m_in.fd = i; - (void) do_close(); + (void) close_fd(fp, i); } /* Release root and working directories. */ @@ -436,7 +396,7 @@ PRIVATE int free_proc(struct fproc *exiter, int flags) * exit. */ if(!(flags & FP_EXITING)) - return OK; + return; dmap_unmap_by_endpt(fp->fp_endpoint); /* Invalidate endpoint number for error and sanity checks. */ @@ -467,53 +427,60 @@ PRIVATE int free_proc(struct fproc *exiter, int flags) /* Exit done. Mark slot as free. */ fp->fp_pid = PID_FREE; - return(OK); - } /*===========================================================================* - * do_exit * + * pm_exit * *===========================================================================*/ -PUBLIC int do_exit() +PUBLIC void pm_exit(proc) +int proc; { - int exitee_p, exitee_e; + int exitee_p; /* Perform the file system portion of the exit(status) system call. */ - /* Only PM may do the EXIT call directly. */ - if (who_e != PM_PROC_NR) return(EGENERIC); - /* Nevertheless, pretend that the call came from the user. */ - exitee_e = m_in.endpt1; - okendpt(exitee_e, &exitee_p); - return free_proc(&fproc[exitee_p], FP_EXITING); + okendpt(proc, &exitee_p); + free_proc(&fproc[exitee_p], FP_EXITING); } /*===========================================================================* - * do_set * + * pm_setgid * *===========================================================================*/ -PUBLIC int do_set() +PUBLIC void pm_setgid(proc_e, egid, rgid) +int proc_e; +int egid; +int rgid; { -/* Set uid_t or gid_t field. */ + register struct fproc *tfp; + int slot; + + okendpt(proc_e, &slot); + tfp = &fproc[slot]; + + tfp->fp_effgid = egid; + tfp->fp_realgid = rgid; +} + +/*===========================================================================* + * pm_setuid * + *===========================================================================*/ +PUBLIC void pm_setuid(proc_e, euid, ruid) +int proc_e; +int euid; +int ruid; +{ register struct fproc *tfp; - int proc; + int slot; - /* Only PM may make this call directly. */ - if (who_e != PM_PROC_NR) return(EGENERIC); + okendpt(proc_e, &slot); + tfp = &fproc[slot]; - okendpt(m_in.endpt1, &proc); - tfp = &fproc[proc]; - if (call_nr == SETUID) { - tfp->fp_realuid = (uid_t) m_in.real_user_id; - tfp->fp_effuid = (uid_t) m_in.eff_user_id; - } - if (call_nr == SETGID) { - tfp->fp_effgid = (gid_t) m_in.eff_grp_id; - tfp->fp_realgid = (gid_t) m_in.real_grp_id; - } - return(OK); + tfp->fp_effuid = euid; + tfp->fp_realuid = ruid; } + /*===========================================================================* * do_revive * *===========================================================================*/ @@ -557,7 +524,7 @@ PUBLIC int do_svrctl() /* Try to update device mapping. */ major = (device.dev >> MAJOR) & BYTE; - r=map_driver(major, who_e, device.style); + r=map_driver(major, who_e, device.style, 0 /* !force */); if (r == OK) { /* If a driver has completed its exec(), it can be announced @@ -581,10 +548,283 @@ PUBLIC int do_svrctl() (phys_bytes) sizeof(fdu))) != OK) return(r); major = (fdu.dev >> MAJOR) & BYTE; - r=map_driver(major, NONE, 0); + r=map_driver(major, NONE, 0, 0); return(r); } default: return(EINVAL); } } + + +/*===========================================================================* + * pm_dumpcore * + *===========================================================================*/ +PUBLIC int pm_dumpcore(proc_e, seg_ptr) +int proc_e; +struct mem_map *seg_ptr; +{ + int r, proc_s; + + r= dumpcore(proc_e, seg_ptr); + + /* Terminate the process */ + okendpt(proc_e, &proc_s); + free_proc(&fproc[proc_s], FP_EXITING); + + return r; +} + +/*===========================================================================* + * dumpcore * + *===========================================================================*/ +PRIVATE int dumpcore(proc_e, seg_ptr) +int proc_e; +struct mem_map *seg_ptr; +{ + int r, seg, proc_s, exists; + mode_t omode; + vir_bytes len; + off_t off, seg_off; + long trace_off, trace_data; + struct fproc *rfp; + struct inode *rip, *ldirp; + struct mem_map segs[NR_LOCAL_SEGS]; + + okendpt(proc_e, &proc_s); + rfp= fp= &fproc[proc_s]; + who_e= proc_e; + who_p= proc_s; + super_user = (fp->fp_effuid == SU_UID ? TRUE : FALSE); /* su? */ + + /* We need the equivalent of + * open(CORE_NAME, O_WRONLY|O_CREAT|O_TRUNC|O_NONBLOCK, CORE_MODE) + */ + + /* Create a new inode by calling new_node(). */ + omode = I_REGULAR | (CORE_MODE & ALL_MODES & rfp->fp_umask); + rip = new_node(&ldirp, CORE_NAME, omode, NO_ZONE, 0, NULL); + r = err_code; + put_inode(ldirp); + exists= (r == EEXIST); + if (r != OK && r != EEXIST) return(r); /* error */ + + /* Only do the normal open code if we didn't just create the file. */ + if (exists) { + /* Check protections. */ + r = forbidden(rip, W_BIT); + if (r != OK) + { + put_inode(rip); + return r; + } + + /* Make sure it is a regular file */ + switch (rip->i_mode & I_TYPE) { + case I_REGULAR: + break; + + case I_DIRECTORY: + /* Directories may be read but not written. */ + r = EISDIR; + break; + + case I_CHAR_SPECIAL: + case I_BLOCK_SPECIAL: + case I_NAMED_PIPE: + r = EPERM; + break; + } + + if (r != OK) + { + put_inode(rip); + return r; + } + + /* Truncate the file */ + truncate_inode(rip, 0); + wipe_inode(rip); + /* Send the inode from the inode cache to the + * block cache, so it gets written on the next + * cache flush. + */ + rw_inode(rip, WRITING); + } + + /* Copy segments from PM */ + r= sys_datacopy(PM_PROC_NR, (vir_bytes)seg_ptr, + SELF, (vir_bytes)segs, sizeof(segs)); + if (r != OK) panic(__FILE__, "dumpcore: cannot copy segment info", r); + + off= 0; + r= write_bytes(rip, off, (char *)segs, sizeof(segs)); + if (r != OK) + { + put_inode(rip); + return r; + } + off += sizeof(segs); + + /* Write out the whole kernel process table entry to get the regs. */ + for (trace_off= 0;; trace_off += sizeof(long)) + { + r= sys_trace(T_GETUSER, proc_e, trace_off, &trace_data); + if (r != OK) + { + printf("dumpcore: sys_trace failed at offset %d: %d\n", + trace_off, r); + break; + } + r= write_bytes(rip, off, (char *)&trace_data, + sizeof(trace_data)); + if (r != OK) + { + put_inode(rip); + return r; + } + off += sizeof(trace_data); + } + + /* Loop through segments and write the segments themselves out. */ + for (seg = 0; seg < NR_LOCAL_SEGS; seg++) { + len= segs[seg].mem_len << CLICK_SHIFT; + seg_off= segs[seg].mem_vir << CLICK_SHIFT; + r= write_seg(rip, off, proc_e, seg, seg_off, len); + if (r != OK) + { + put_inode(rip); + return r; + } + off += len; + } + + rip->i_size= off; + rip->i_dirt = DIRTY; + + put_inode(rip); + return OK; +} + + +/*===========================================================================* + * write_bytes * + *===========================================================================*/ +PRIVATE int write_bytes(rip, off, buf, bytes) +struct inode *rip; /* inode descriptor to read from */ +off_t off; /* offset in file */ +char *buf; +size_t bytes; /* how much is to be transferred? */ +{ + int r, block_size; + off_t n, o, b_off; + block_t b; + struct buf *bp; + + block_size= rip->i_sp->s_block_size; + for (o= off - (off % block_size); o < off+bytes; o += block_size) + { + if (o < off) + b_off= off-o; + else + b_off= 0; + n= block_size-b_off; + if (o+b_off+n > off+bytes) + n= off+bytes-(o+b_off); + + b = read_map(rip, o); + + if (b == NO_BLOCK) { + /* Writing to a nonexistent block. Create and enter + * in inode. + */ + if ((bp= new_block(rip, o)) == NIL_BUF) + return(err_code); + } + else + { + /* Just read the block, no need to optimize for + * writing entire blocks. + */ + bp = get_block(rip->i_dev, b, NORMAL); + } + + if (n != block_size && o >= rip->i_size && b_off == 0) { + zero_block(bp); + } + + /* Copy a chunk from user space to the block buffer. */ + memcpy((bp->b_data+b_off), buf, n); + bp->b_dirt = DIRTY; + if (b_off + n == block_size) + put_block(bp, FULL_DATA_BLOCK); + else + put_block(bp, PARTIAL_DATA_BLOCK); + + buf += n; + } + + return OK; +} + +/*===========================================================================* + * write_seg * + *===========================================================================*/ +PRIVATE int write_seg(rip, off, proc_e, seg, seg_off, seg_bytes) +struct inode *rip; /* inode descriptor to read from */ +off_t off; /* offset in file */ +int proc_e; /* process number (endpoint) */ +int seg; /* T, D, or S */ +off_t seg_off; /* Offset in segment */ +phys_bytes seg_bytes; /* how much is to be transferred? */ +{ + int r, block_size, fl; + off_t n, o, b_off; + block_t b; + struct buf *bp; + + block_size= rip->i_sp->s_block_size; + for (o= off - (off % block_size); o < off+seg_bytes; o += block_size) + { + if (o < off) + b_off= off-o; + else + b_off= 0; + n= block_size-b_off; + if (o+b_off+n > off+seg_bytes) + n= off+seg_bytes-(o+b_off); + + b = read_map(rip, o); + if (b == NO_BLOCK) { + /* Writing to a nonexistent block. Create and enter in inode.*/ + if ((bp= new_block(rip, o)) == NIL_BUF) + return(err_code); + } else { + /* Normally an existing block to be partially overwritten is + * first read in. However, a full block need not be read in. + * If it is already in the cache, acquire it, otherwise just + * acquire a free buffer. + */ + fl = (n == block_size ? NO_READ : NORMAL); + bp = get_block(rip->i_dev, b, fl); + } + + if (n != block_size && o >= rip->i_size && b_off == 0) { + zero_block(bp); + } + + /* Copy a chunk from user space to the block buffer. */ + r = sys_vircopy(proc_e, seg, (phys_bytes) seg_off, + FS_PROC_NR, D, (phys_bytes) (bp->b_data+b_off), + (phys_bytes) n); + bp->b_dirt = DIRTY; + fl = (b_off + n == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK); + put_block(bp, fl); + + seg_off += n; + } + + return OK; +} + + diff --git a/servers/fs/open.c b/servers/fs/open.c index a4f2cdbc4..85f24a385 100644 --- a/servers/fs/open.c +++ b/servers/fs/open.c @@ -8,6 +8,7 @@ * do_mkdir: perform the MKDIR system call * do_close: perform the CLOSE system call * do_lseek: perform the LSEEK system call + * new_node: create a new file, directory, etc. */ #include "fs.h" @@ -31,8 +32,6 @@ PRIVATE char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0}; FORWARD _PROTOTYPE( int common_open, (int oflags, mode_t omode) ); FORWARD _PROTOTYPE( int pipe_open, (struct inode *rip,mode_t bits,int oflags)); -FORWARD _PROTOTYPE( struct inode *new_node, (struct inode **ldirp, - char *path, mode_t bits, zone_t z0, int opaque, char *string)); /*===========================================================================* * do_creat * @@ -200,7 +199,7 @@ PRIVATE int common_open(register int oflags, mode_t omode) /*===========================================================================* * new_node * *===========================================================================*/ -PRIVATE struct inode *new_node(struct inode **ldirp, +PUBLIC struct inode *new_node(struct inode **ldirp, char *path, mode_t bits, zone_t z0, int opaque, char *parsed) { /* New_node() is called by common_open(), do_mknod(), and do_mkdir(). @@ -389,6 +388,17 @@ PUBLIC int do_mkdir() PUBLIC int do_close() { /* Perform the close(fd) system call. */ + return close_fd(fp, m_in.fd); +} + +/*===========================================================================* + * close_fd * + *===========================================================================*/ +PUBLIC int close_fd(rfp, fd_nr) +struct fproc *rfp; +int fd_nr; +{ +/* Close a filedescriptor for a process. */ register struct filp *rfilp; register struct inode *rip; @@ -397,7 +407,7 @@ PUBLIC int do_close() dev_t dev; /* First locate the inode that belongs to the file descriptor. */ - if ( (rfilp = get_filp(m_in.fd)) == NIL_FILP) return(err_code); + if ( (rfilp = get_filp2(rfp, fd_nr)) == NIL_FILP) return(err_code); rip = rfilp->filp_ino; /* 'rip' points to the inode */ if (rfilp->filp_count - 1 == 0 && rfilp->filp_mode != FILP_CLOSED) { @@ -440,16 +450,16 @@ PUBLIC int do_close() put_inode(rip); } - fp->fp_cloexec &= ~(1L << m_in.fd); /* turn off close-on-exec bit */ - fp->fp_filp[m_in.fd] = NIL_FILP; - FD_CLR(m_in.fd, &fp->fp_filp_inuse); + rfp->fp_cloexec &= ~(1L << fd_nr); /* turn off close-on-exec bit */ + rfp->fp_filp[fd_nr] = NIL_FILP; + FD_CLR(fd_nr, &rfp->fp_filp_inuse); /* Check to see if the file is locked. If so, release all locks. */ if (nr_locks == 0) return(OK); lock_count = nr_locks; /* save count of locks */ for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) { if (flp->lock_type == 0) continue; /* slot not in use */ - if (flp->lock_inode == rip && flp->lock_pid == fp->fp_pid) { + if (flp->lock_inode == rip && flp->lock_pid == rfp->fp_pid) { flp->lock_type = 0; nr_locks--; } diff --git a/servers/fs/param.h b/servers/fs/param.h index 883f2bab3..2ea84d819 100644 --- a/servers/fs/param.h +++ b/servers/fs/param.h @@ -34,6 +34,7 @@ #define driver_nr m4_l2 #define dev_nr m4_l3 #define dev_style m4_l4 +#define m_force m4_l5 #define rd_only m1_i3 #define real_user_id m1_i2 #define request m1_i2 diff --git a/servers/fs/pipe.c b/servers/fs/pipe.c index 8adaa607c..4282f8eef 100644 --- a/servers/fs/pipe.c +++ b/servers/fs/pipe.c @@ -328,18 +328,32 @@ int returned; /* if hanging on task, how many bytes read */ *===========================================================================*/ PUBLIC int do_unpause() { +/* A signal has been sent to a user who is paused on the file system. + * Abort the system call with the EINTR error message. + */ + int proc_nr_e; + + if (who_e != PM_PROC_NR) return(EPERM); + proc_nr_e = m_in.ENDPT; + return unpause(proc_nr_e); +} + +/*===========================================================================* + * unpause * + *===========================================================================*/ +PUBLIC int unpause(proc_nr_e) +int proc_nr_e; +{ /* A signal has been sent to a user who is paused on the file system. * Abort the system call with the EINTR error message. */ register struct fproc *rfp; - int proc_nr_e, proc_nr_p, task, fild; + int proc_nr_p, task, fild; struct filp *f; dev_t dev; message mess; - if (who_e != PM_PROC_NR) return(EPERM); - proc_nr_e = m_in.ENDPT; okendpt(proc_nr_e, &proc_nr_p); rfp = &fproc[proc_nr_p]; if (rfp->fp_suspended == NOT_SUSPENDED) return(OK); diff --git a/servers/fs/proto.h b/servers/fs/proto.h index 61c606b47..2ef7c6679 100644 --- a/servers/fs/proto.h +++ b/servers/fs/proto.h @@ -5,6 +5,7 @@ /* Structs used in prototypes must be declared as such first. */ struct buf; struct filp; +struct fproc; struct inode; struct super_block; @@ -29,6 +30,8 @@ _PROTOTYPE( void invalidate2, (Dev_t device) ); /* device.c */ _PROTOTYPE( int dev_open, (Dev_t dev, int proc, int flags) ); _PROTOTYPE( void dev_close, (Dev_t dev) ); +_PROTOTYPE( int dev_bio, (int op, Dev_t dev, int proc, void *buf, + off_t pos, int bytes, int flags) ); _PROTOTYPE( int dev_io, (int op, Dev_t dev, int proc, void *buf, off_t pos, int bytes, int flags) ); _PROTOTYPE( int gen_opcl, (int op, Dev_t dev, int proc, int flags) ); @@ -40,7 +43,7 @@ _PROTOTYPE( int ctty_opcl, (int op, Dev_t dev, int proc, int flags) ); _PROTOTYPE( int clone_opcl, (int op, Dev_t dev, int proc, int flags) ); _PROTOTYPE( int ctty_io, (int task_nr, message *mess_ptr) ); _PROTOTYPE( int do_ioctl, (void) ); -_PROTOTYPE( int do_setsid, (void) ); +_PROTOTYPE( void pm_setsid, (int proc_e) ); _PROTOTYPE( void dev_status, (message *) ); _PROTOTYPE( void dev_up, (int major) ); @@ -49,16 +52,24 @@ _PROTOTYPE( int do_fkey_pressed, (void) ); /* dmap.c */ _PROTOTYPE( int do_devctl, (void) ); +_PROTOTYPE( int fs_devctl, (int req, int dev, int proc_nr_e, int style, + int force) ); _PROTOTYPE( void build_dmap, (void) ); -_PROTOTYPE( int map_driver, (int major, int proc_nr, int dev_style) ); +_PROTOTYPE( int map_driver, (int major, int proc_nr, int dev_style, + int force) ); _PROTOTYPE( int dmap_driver_match, (int proc, int major) ); _PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr) ); _PROTOTYPE( void dmap_endpt_up, (int proc_nr) ); +/* exec.c */ +_PROTOTYPE( int pm_exec, (int proc_e, char *path, vir_bytes path_len, + char *frame, vir_bytes frame_len) ); + /* filedes.c */ _PROTOTYPE( struct filp *find_filp, (struct inode *rip, mode_t bits) ); _PROTOTYPE( int get_fd, (int start, mode_t bits, int *k, struct filp **fpt) ); _PROTOTYPE( struct filp *get_filp, (int fild) ); +_PROTOTYPE( struct filp *get_filp2, (struct fproc *rfp, int fild) ); _PROTOTYPE( int inval_filp, (struct filp *) ); /* inode.c */ @@ -90,17 +101,18 @@ _PROTOTYPE( void reply, (int whom, int result) ); /* misc.c */ _PROTOTYPE( int do_dup, (void) ); -_PROTOTYPE( int do_exit, (void) ); +_PROTOTYPE( void pm_exit, (int proc) ); _PROTOTYPE( int do_fcntl, (void) ); -_PROTOTYPE( int do_fork, (void) ); -_PROTOTYPE( int do_exec, (void) ); +_PROTOTYPE( void pm_fork, (int pproc, int cproc, int cpid) ); _PROTOTYPE( int do_revive, (void) ); -_PROTOTYPE( int do_set, (void) ); +_PROTOTYPE( void pm_setgid, (int proc_e, int egid, int rgid) ); +_PROTOTYPE( void pm_setuid, (int proc_e, int euid, int ruid) ); _PROTOTYPE( int do_sync, (void) ); _PROTOTYPE( int do_fsync, (void) ); -_PROTOTYPE( int do_reboot, (void) ); +_PROTOTYPE( void pm_reboot, (void) ); _PROTOTYPE( int do_svrctl, (void) ); _PROTOTYPE( int do_getsysinfo, (void) ); +_PROTOTYPE( int pm_dumpcore, (int proc_e, struct mem_map *seg_ptr) ); /* mount.c */ _PROTOTYPE( int do_mount, (void) ); @@ -109,12 +121,15 @@ _PROTOTYPE( int unmount, (Dev_t dev) ); /* open.c */ _PROTOTYPE( int do_close, (void) ); +_PROTOTYPE( int close_fd, (struct fproc *rfp, int fd_nr) ); _PROTOTYPE( int do_creat, (void) ); _PROTOTYPE( int do_lseek, (void) ); _PROTOTYPE( int do_mknod, (void) ); _PROTOTYPE( int do_mkdir, (void) ); _PROTOTYPE( int do_open, (void) ); -_PROTOTYPE( int do_slink, (void) ); +_PROTOTYPE( int do_slink, (void) ); +_PROTOTYPE( struct inode *new_node, (struct inode **ldirp, + char *path, mode_t bits, zone_t z0, int opaque, char *string) ); /* path.c */ _PROTOTYPE( struct inode *advance,(struct inode **dirp, char string[NAME_MAX])); @@ -128,6 +143,7 @@ _PROTOTYPE( struct inode *parse_path, (char *path, char string[NAME_MAX], /* pipe.c */ _PROTOTYPE( int do_pipe, (void) ); _PROTOTYPE( int do_unpause, (void) ); +_PROTOTYPE( int unpause, (int proc_nr_e) ); _PROTOTYPE( int pipe_check, (struct inode *rip, int rw_flag, int oflags, int bytes, off_t position, int *canwrite, int notouch)); _PROTOTYPE( void release, (struct inode *ip, int call_nr, int count) ); @@ -211,6 +227,3 @@ _PROTOTYPE( void fs_set_timer, (timer_t *tp, int delta, tmr_func_t watchdog, int _PROTOTYPE( void fs_expire_timers, (clock_t now) ); _PROTOTYPE( void fs_cancel_timer, (timer_t *tp) ); _PROTOTYPE( void fs_init_timer, (timer_t *tp) ); - -/* cdprobe.c */ -_PROTOTYPE( int cdprobe, (void) ); diff --git a/servers/fs/table.c b/servers/fs/table.c index 7c521708d..94166d844 100644 --- a/servers/fs/table.c +++ b/servers/fs/table.c @@ -16,8 +16,8 @@ PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = { no_sys, /* 0 = unused */ - do_exit, /* 1 = exit */ - do_fork, /* 2 = fork */ + no_sys, /* 1 = (exit) */ + no_sys, /* 2 = (fork) */ do_read, /* 3 = read */ do_write, /* 4 = write */ do_open, /* 5 = open */ @@ -38,7 +38,7 @@ PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = { no_sys, /* 20 = getpid */ do_mount, /* 21 = mount */ do_umount, /* 22 = umount */ - do_set, /* 23 = setuid */ + no_sys, /* 23 = (setuid) */ no_sys, /* 24 = getuid */ do_stime, /* 25 = stime */ no_sys, /* 26 = ptrace */ @@ -61,7 +61,7 @@ PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = { no_sys, /* 43 = times */ no_sys, /* 44 = (prof) */ do_slink, /* 45 = symlink */ - do_set, /* 46 = setgid */ + no_sys, /* 46 = (setgid) */ no_sys, /* 47 = getgid */ no_sys, /* 48 = (signal)*/ do_rdlink, /* 49 = readlink*/ @@ -74,10 +74,10 @@ PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = { no_sys, /* 56 = (mpx) */ no_sys, /* 57 = unused */ no_sys, /* 58 = unused */ - do_exec, /* 59 = execve */ + no_sys, /* 59 = (execve) */ do_umask, /* 60 = umask */ do_chroot, /* 61 = chroot */ - do_setsid, /* 62 = setsid */ + no_sys, /* 62 = (setsid) */ no_sys, /* 63 = getpgrp */ no_sys, /* 64 = KSIG: signals originating in the kernel */ @@ -92,7 +92,7 @@ PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = { no_sys, /* 73 = sigpending */ no_sys, /* 74 = sigprocmask */ no_sys, /* 75 = sigreturn */ - do_reboot, /* 76 = reboot */ + no_sys, /* 76 = (reboot) */ do_svrctl, /* 77 = svrctl */ no_sys, /* 78 = unused */ diff --git a/servers/pm/break.c b/servers/pm/break.c index b57aaa464..527c784f6 100644 --- a/servers/pm/break.c +++ b/servers/pm/break.c @@ -24,7 +24,7 @@ #define STACK_CHANGED 2 /* flag value when stack size changed */ /*===========================================================================* - * do_brk * + * do_brk * *===========================================================================*/ PUBLIC int do_brk() { @@ -57,7 +57,7 @@ PUBLIC int do_brk() } /*===========================================================================* - * adjust * + * adjust * *===========================================================================*/ PUBLIC int adjust(rmp, data_clicks, sp) register struct mproc *rmp; /* whose memory is being adjusted? */ @@ -79,12 +79,13 @@ vir_bytes sp; /* new value of sp */ mem_sp = &rmp->mp_seg[S]; /* pointer to stack segment map */ changed = 0; /* set when either segment changed */ - if (mem_sp->mem_len == 0) return(OK); /* don't bother init */ - /* See if stack size has gone negative (i.e., sp too close to 0xFFFF...) */ base_of_stack = (long) mem_sp->mem_vir + (long) mem_sp->mem_len; sp_click = sp >> CLICK_SHIFT; /* click containing sp */ - if (sp_click >= base_of_stack) return(ENOMEM); /* sp too high */ + if (sp_click >= base_of_stack) + { + return(ENOMEM); /* sp too high */ + } /* Compute size of gap between stack and data segments. */ delta = (long) mem_sp->mem_vir - (long) sp_click; @@ -94,7 +95,10 @@ vir_bytes sp; /* new value of sp */ #define SAFETY_BYTES (384 * sizeof(char *)) #define SAFETY_CLICKS ((SAFETY_BYTES + CLICK_SIZE - 1) / CLICK_SIZE) gap_base = mem_dp->mem_vir + data_clicks + SAFETY_CLICKS; - if (lower < gap_base) return(ENOMEM); /* data and stack collided */ + if (lower < gap_base) + { + return(ENOMEM); /* data and stack collided */ + } /* Update data length (but not data orgin) on behalf of brk() system call. */ old_clicks = mem_dp->mem_len; diff --git a/servers/pm/const.h b/servers/pm/const.h index 7c62a6839..200dfe837 100644 --- a/servers/pm/const.h +++ b/servers/pm/const.h @@ -19,3 +19,5 @@ #define PM_PID 0 /* PM's process id number */ #define INIT_PID 1 /* INIT's process id number */ +#define DUMPED 0200 /* bit set in status when core dumped */ + diff --git a/servers/pm/exec.c b/servers/pm/exec.c index 504d147c7..0190bb7b9 100644 --- a/servers/pm/exec.c +++ b/servers/pm/exec.c @@ -12,7 +12,9 @@ * * The entry points into this file are: * do_exec: perform the EXEC system call - * rw_seg: read or write a segment from or to a file + * exec_newmem: allocate new memory map for a process that tries to exec + * do_execrestart: finish the special exec call for RS + * exec_restart: finish a regular exec call * find_share: find a process whose text segment can be shared */ @@ -27,18 +29,9 @@ #include "mproc.h" #include "param.h" -FORWARD _PROTOTYPE( int new_mem, (struct mproc *sh_mp, vir_bytes text_bytes, - vir_bytes data_bytes, vir_bytes bss_bytes, - vir_bytes stk_bytes, phys_bytes tot_bytes) ); -FORWARD _PROTOTYPE( void patch_ptr, (char stack[ARG_MAX], vir_bytes base) ); -FORWARD _PROTOTYPE( int insert_arg, (char stack[ARG_MAX], - vir_bytes *stk_bytes, char *arg, int replace) ); -FORWARD _PROTOTYPE( char *patch_stack, (int fd, char stack[ARG_MAX], - vir_bytes *stk_bytes, char *script) ); -FORWARD _PROTOTYPE( int read_header, (int fd, int *ft, vir_bytes *text_bytes, - vir_bytes *data_bytes, vir_bytes *bss_bytes, - phys_bytes *tot_bytes, long *sym_bytes, vir_clicks sc, - vir_bytes *pc) ); +FORWARD _PROTOTYPE( int new_mem, (struct mproc *rmp, struct mproc *sh_mp, + vir_bytes text_bytes, vir_bytes data_bytes, vir_bytes bss_bytes, + vir_bytes stk_bytes, phys_bytes tot_bytes) ); #define ESCRIPT (-2000) /* Returned by read_header for a #! script. */ #define PTRSIZE sizeof(char *) /* Size of pointers in argv[] and envp[]. */ @@ -48,250 +41,236 @@ FORWARD _PROTOTYPE( int read_header, (int fd, int *ft, vir_bytes *text_bytes, *===========================================================================*/ PUBLIC int do_exec() { -/* Perform the execve(name, argv, envp) call. The user library builds a - * complete stack image, including pointers, args, environ, etc. The stack - * is copied to a buffer inside PM, and then to the new core image. - */ - register struct mproc *rmp; - struct mproc *sh_mp; - int m, r, r2, fd, ft, sn; - static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */ - static char name_buf[PATH_MAX]; /* the name of the file to exec */ - char *new_sp, *name, *basename; - vir_bytes src, dst, text_bytes, data_bytes, bss_bytes, stk_bytes, vsp; - phys_bytes tot_bytes; /* total space for program, including gap */ - long sym_bytes; - vir_clicks sc; - struct stat s_buf[2], *s_p; - vir_bytes pc; - - /* Do some validity checks. */ - rmp = mp; - stk_bytes = (vir_bytes) m_in.stack_bytes; - if (stk_bytes > ARG_MAX) return(ENOMEM); /* stack too big */ - if (m_in.exec_len <= 0 || m_in.exec_len > PATH_MAX) return(EINVAL); - - /* Get the exec file name and see if the file is executable. */ - src = (vir_bytes) m_in.exec_name; - dst = (vir_bytes) name_buf; - r = sys_datacopy(who_e, (vir_bytes) src, - PM_PROC_NR, (vir_bytes) dst, (phys_bytes) m_in.exec_len); - if (r != OK) return(r); /* file name not in user data segment */ - - /* Fetch the stack from the user before destroying the old core image. */ - src = (vir_bytes) m_in.stack_ptr; - dst = (vir_bytes) mbuf; - r = sys_datacopy(who_e, (vir_bytes) src, - PM_PROC_NR, (vir_bytes) dst, (phys_bytes)stk_bytes); - /* can't fetch stack (e.g. bad virtual addr) */ - if (r != OK) return(EACCES); - - r = 0; /* r = 0 (first attempt), or 1 (interpreted script) */ - name = name_buf; /* name of file to exec. */ - do { - s_p = &s_buf[r]; - tell_fs(CHDIR, who_e, FALSE, 0); /* switch to the user's FS environ */ - fd = allowed(name, s_p, X_BIT); /* is file executable? */ - if (fd < 0) return(fd); /* file was not executable */ - - /* Read the file header and extract the segment sizes. */ - sc = (stk_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; - - m = read_header(fd, &ft, &text_bytes, &data_bytes, &bss_bytes, - &tot_bytes, &sym_bytes, sc, &pc); - if (m != ESCRIPT || ++r > 1) break; - } while ((name = patch_stack(fd, mbuf, &stk_bytes, name_buf)) != NULL); - - if (m < 0) { - close(fd); /* something wrong with header */ - return(stk_bytes > ARG_MAX ? ENOMEM : ENOEXEC); - } - - /* Can the process' text be shared with that of one already running? */ - sh_mp = find_share(rmp, s_p->st_ino, s_p->st_dev, s_p->st_ctime); - - /* Allocate new memory and release old memory. Fix map and tell kernel. */ - r = new_mem(sh_mp, text_bytes, data_bytes, bss_bytes, stk_bytes, tot_bytes); - if (r != OK) { - close(fd); /* insufficient core or program too big */ - return(r); - } + int r; + + /* Save parameters */ + mp->mp_exec_path= m_in.exec_name; + mp->mp_exec_path_len= m_in.exec_len; + mp->mp_exec_frame= m_in.stack_ptr; + mp->mp_exec_frame_len= m_in.stack_bytes; + + /* Forward call to FS */ + if (mp->mp_fs_call != PM_IDLE) + { + panic(__FILE__, "do_exec: not idle", mp->mp_fs_call); + } + mp->mp_fs_call= PM_EXEC; + r= notify(FS_PROC_NR); + if (r != OK) + panic(__FILE__, "do_getset: unable to notify FS", r); - /* Save file identification to allow it to be shared. */ - rmp->mp_ino = s_p->st_ino; - rmp->mp_dev = s_p->st_dev; - rmp->mp_ctime = s_p->st_ctime; - - /* Patch up stack and copy it from PM to new core image. */ - vsp = (vir_bytes) rmp->mp_seg[S].mem_vir << CLICK_SHIFT; - vsp += (vir_bytes) rmp->mp_seg[S].mem_len << CLICK_SHIFT; - vsp -= stk_bytes; - patch_ptr(mbuf, vsp); - src = (vir_bytes) mbuf; - r = sys_datacopy(PM_PROC_NR, (vir_bytes) src, - who_e, (vir_bytes) vsp, (phys_bytes)stk_bytes); - if (r != OK) panic(__FILE__,"do_exec stack copy err on", who_e); - - /* Read in text and data segments. */ - if (sh_mp != NULL) { - lseek(fd, (off_t) text_bytes, SEEK_CUR); /* shared: skip text */ - } else { - rw_seg(0, fd, who_e, T, text_bytes); - } - rw_seg(0, fd, who_e, D, data_bytes); + /* Do not reply */ + return SUSPEND; +} - close(fd); /* don't need exec file any more */ - /* Take care of setuid/setgid bits. */ - if ((rmp->mp_flags & TRACED) == 0) { /* suppress if tracing */ - if (s_buf[0].st_mode & I_SET_UID_BIT) { - rmp->mp_effuid = s_buf[0].st_uid; - tell_fs(SETUID, who_e, (int)rmp->mp_realuid, (int)rmp->mp_effuid); +/*===========================================================================* + * exec_newmem * + *===========================================================================*/ +PUBLIC int exec_newmem() +{ + int r, proc_e, proc_n, allow_setuid; + vir_bytes stack_top; + vir_clicks tc, dc, sc, totc, dvir, s_vir; + struct mproc *rmp, *sh_mp; + char *ptr; + struct exec_newmem args; + + if (who_e != FS_PROC_NR && who_e != RS_PROC_NR) + return EPERM; + + proc_e= m_in.EXC_NM_PROC; + if (pm_isokendpt(proc_e, &proc_n) != OK) + { + panic(__FILE__, "exec_newmem: got bad endpoint", + proc_e); } - if (s_buf[0].st_mode & I_SET_GID_BIT) { - rmp->mp_effgid = s_buf[0].st_gid; - tell_fs(SETGID,who_e, (int)rmp->mp_realgid, (int)rmp->mp_effgid); + rmp= &mproc[proc_n]; + + ptr= m_in.EXC_NM_PTR; + r= sys_datacopy(who_e, (vir_bytes)ptr, + SELF, (vir_bytes)&args, sizeof(args)); + if (r != OK) + panic(__FILE__, "exec_newmem: sys_datacopy failed", r); + + /* Check to see if segment sizes are feasible. */ + tc = ((unsigned long) args.text_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; + dc = (args.data_bytes+args.bss_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; + totc = (args.tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; + sc = (args.args_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; + if (dc >= totc) return(ENOEXEC); /* stack must be at least 1 click */ + + dvir = (args.sep_id ? 0 : tc); + s_vir = dvir + (totc - sc); +#if (CHIP == INTEL && _WORD_SIZE == 2) + r = size_ok(*ft, tc, dc, sc, dvir, s_vir); +#else + r = (dvir + dc > s_vir) ? ENOMEM : OK; +#endif + if (r != OK) + return r; + + /* Can the process' text be shared with that of one already running? */ + sh_mp = find_share(rmp, args.st_ino, args.st_dev, args.st_ctime); + + /* Allocate new memory and release old memory. Fix map and tell + * kernel. + */ + r = new_mem(rmp, sh_mp, args.text_bytes, args.data_bytes, + args.bss_bytes, args.args_bytes, args.tot_bytes); + if (r != OK) return(r); + + rmp->mp_flags |= PARTIAL_EXEC; /* Kill process if something goes + * wrong after this point. + */ + + /* Save file identification to allow it to be shared. */ + rmp->mp_ino = args.st_ino; + rmp->mp_dev = args.st_dev; + rmp->mp_ctime = args.st_ctime; + + stack_top= ((vir_bytes)rmp->mp_seg[S].mem_vir << CLICK_SHIFT) + + ((vir_bytes)rmp->mp_seg[S].mem_len << CLICK_SHIFT); + + /* Save offset to initial argc (for ps) */ + rmp->mp_procargs = stack_top - args.args_bytes; + + /* set/clear separate I&D flag */ + if (args.sep_id) + rmp->mp_flags |= SEPARATE; + else + rmp->mp_flags &= ~SEPARATE; + + allow_setuid= 0; /* Do not allow setuid execution */ + if ((rmp->mp_flags & TRACED) == 0) { + /* Okay, setuid execution is allowed */ + allow_setuid= 1; + rmp->mp_effuid = args.new_uid; + rmp->mp_effgid = args.new_gid; } - } - /* Save offset to initial argc (for ps) */ - rmp->mp_procargs = vsp; + /* System will save command line for debugging, ps(1) output, etc. */ + strncpy(rmp->mp_name, args.progname, PROC_NAME_LEN-1); + rmp->mp_name[PROC_NAME_LEN-1] = '\0'; - /* Fix 'mproc' fields, tell kernel that exec is done, reset caught sigs. */ - for (sn = 1; sn <= _NSIG; sn++) { - if (sigismember(&rmp->mp_catch, sn)) { - sigdelset(&rmp->mp_catch, sn); - rmp->mp_sigact[sn].sa_handler = SIG_DFL; - sigemptyset(&rmp->mp_sigact[sn].sa_mask); - } - } + mp->mp_reply.reply_res2= stack_top; + mp->mp_reply.reply_res3= 0; + if (!sh_mp) /* Load text if sh_mp = NULL */ + mp->mp_reply.reply_res3 |= EXC_NM_RF_LOAD_TEXT; + if (allow_setuid) + mp->mp_reply.reply_res3 |= EXC_NM_RF_ALLOW_SETUID; - rmp->mp_flags &= ~SEPARATE; /* turn off SEPARATE bit */ - rmp->mp_flags |= ft; /* turn it on for separate I & D files */ - new_sp = (char *) vsp; + return OK; +} - tell_fs(EXEC, who_e, 0, 0); /* allow FS to handle FD_CLOEXEC files */ - /* System will save command line for debugging, ps(1) output, etc. */ - basename = strrchr(name, '/'); - if (basename == NULL) basename = name; else basename++; - strncpy(rmp->mp_name, basename, PROC_NAME_LEN-1); - rmp->mp_name[PROC_NAME_LEN] = '\0'; - if((r2=sys_exec(who_e, new_sp, basename, pc)) != OK) { - panic(__FILE__,"sys_exec failed", r2); - } +/*===========================================================================* + * do_execrestart * + *===========================================================================*/ +PUBLIC int do_execrestart() +{ + int proc_e, proc_n, result; + struct mproc *rmp; + + if (who_e != RS_PROC_NR) + return EPERM; + + proc_e= m_in.EXC_RS_PROC; + if (pm_isokendpt(proc_e, &proc_n) != OK) + { + panic(__FILE__, "do_execrestart: got bad endpoint", + proc_e); + } + rmp= &mproc[proc_n]; + result= m_in.EXC_RS_RESULT; - /* Cause a signal if this process is traced. */ - if (rmp->mp_flags & TRACED) check_sig(rmp->mp_pid, SIGTRAP); + exec_restart(rmp, result); - return(SUSPEND); /* no reply, new program just runs */ + return OK; } + /*===========================================================================* - * read_header * + * exec_restart * *===========================================================================*/ -PRIVATE int read_header(fd, ft, text_bytes, data_bytes, bss_bytes, - tot_bytes, sym_bytes, sc, pc) -int fd; /* file descriptor for reading exec file */ -int *ft; /* place to return ft number */ -vir_bytes *text_bytes; /* place to return text size */ -vir_bytes *data_bytes; /* place to return initialized data size */ -vir_bytes *bss_bytes; /* place to return bss size */ -phys_bytes *tot_bytes; /* place to return total size */ -long *sym_bytes; /* place to return symbol table size */ -vir_clicks sc; /* stack size in clicks */ -vir_bytes *pc; /* program entry point (initial PC) */ +PUBLIC void exec_restart(rmp, result) +struct mproc *rmp; +int result; { -/* Read the header and extract the text, data, bss and total sizes from it. */ - - int m, ct; - vir_clicks tc, dc, s_vir, dvir; - phys_clicks totc; - struct exec hdr; /* a.out header is read in here */ - - /* Read the header and check the magic number. The standard MINIX header - * is defined in . It consists of 8 chars followed by 6 longs. - * Then come 4 more longs that are not used here. - * Byte 0: magic number 0x01 - * Byte 1: magic number 0x03 - * Byte 2: normal = 0x10 (not checked, 0 is OK), separate I/D = 0x20 - * Byte 3: CPU type, Intel 16 bit = 0x04, Intel 32 bit = 0x10, - * Motorola = 0x0B, Sun SPARC = 0x17 - * Byte 4: Header length = 0x20 - * Bytes 5-7 are not used. - * - * Now come the 6 longs - * Bytes 8-11: size of text segments in bytes - * Bytes 12-15: size of initialized data segment in bytes - * Bytes 16-19: size of bss in bytes - * Bytes 20-23: program entry point - * Bytes 24-27: total memory allocated to program (text, data + stack) - * Bytes 28-31: size of symbol table in bytes - * The longs are represented in a machine dependent order, - * little-endian on the 8088, big-endian on the 68000. - * The header is followed directly by the text and data segments, and the - * symbol table (if any). The sizes are given in the header. Only the - * text and data segments are copied into memory by exec. The header is - * used here only. The symbol table is for the benefit of a debugger and - * is ignored here. - */ + int r, sn; + vir_bytes pc; + char *new_sp; + + if (result != OK) + { + if (rmp->mp_flags & PARTIAL_EXEC) + { + printf("partial exec; killing process\n"); + + /* Use SIGILL signal that something went wrong */ + rmp->mp_sigstatus = SIGILL; + pm_exit(rmp, 0, FALSE /*!for_trace*/); + return; + } + setreply(rmp-mproc, result); + return; + } - if ((m= read(fd, &hdr, A_MINHDR)) < 2) return(ENOEXEC); + rmp->mp_flags &= ~PARTIAL_EXEC; + + /* Fix 'mproc' fields, tell kernel that exec is done, reset caught + * sigs. + */ + for (sn = 1; sn <= _NSIG; sn++) { + if (sigismember(&rmp->mp_catch, sn)) { + sigdelset(&rmp->mp_catch, sn); + rmp->mp_sigact[sn].sa_handler = SIG_DFL; + sigemptyset(&rmp->mp_sigact[sn].sa_mask); + } + } - /* Interpreted script? */ - if (((char *) &hdr)[0] == '#' && ((char *) &hdr)[1] == '!') return(ESCRIPT); - if (m != A_MINHDR) return(ENOEXEC); + new_sp= (char *)rmp->mp_procargs; + pc= 0; /* for now */ + r= sys_exec(rmp->mp_endpoint, new_sp, rmp->mp_name, pc); + if (r != OK) panic(__FILE__, "sys_exec failed", r); - /* Check magic number, cpu type, and flags. */ - if (BADMAG(hdr)) return(ENOEXEC); -#if (CHIP == INTEL && _WORD_SIZE == 2) - if (hdr.a_cpu != A_I8086) return(ENOEXEC); -#endif -#if (CHIP == INTEL && _WORD_SIZE == 4) - if (hdr.a_cpu != A_I80386) return(ENOEXEC); -#endif - if ((hdr.a_flags & ~(A_NSYM | A_EXEC | A_SEP)) != 0) return(ENOEXEC); - - *ft = ( (hdr.a_flags & A_SEP) ? SEPARATE : 0); /* separate I & D or not */ - - /* Get text and data sizes. */ - *text_bytes = (vir_bytes) hdr.a_text; /* text size in bytes */ - *data_bytes = (vir_bytes) hdr.a_data; /* data size in bytes */ - *bss_bytes = (vir_bytes) hdr.a_bss; /* bss size in bytes */ - *tot_bytes = hdr.a_total; /* total bytes to allocate for prog */ - *sym_bytes = hdr.a_syms; /* symbol table size in bytes */ - if (*tot_bytes == 0) return(ENOEXEC); - - if (*ft != SEPARATE) { - /* If I & D space is not separated, it is all considered data. Text=0*/ - *data_bytes += *text_bytes; - *text_bytes = 0; + /* Cause a signal if this process is traced. */ + if (rmp->mp_flags & TRACED) check_sig(rmp->mp_pid, SIGTRAP); +} + +/*===========================================================================* + * find_share * + *===========================================================================*/ +PUBLIC struct mproc *find_share(mp_ign, ino, dev, ctime) +struct mproc *mp_ign; /* process that should not be looked at */ +ino_t ino; /* parameters that uniquely identify a file */ +dev_t dev; +time_t ctime; +{ +/* Look for a process that is the file in execution. Don't + * accidentally "find" mp_ign, because it is the process on whose behalf this + * call is made. + */ + struct mproc *sh_mp; + for (sh_mp = &mproc[0]; sh_mp < &mproc[NR_PROCS]; sh_mp++) { + + if (!(sh_mp->mp_flags & SEPARATE)) continue; + if (sh_mp == mp_ign) continue; + if (sh_mp->mp_ino != ino) continue; + if (sh_mp->mp_dev != dev) continue; + if (sh_mp->mp_ctime != ctime) continue; + return sh_mp; } - *pc = hdr.a_entry; /* initial address to start execution */ - - /* Check to see if segment sizes are feasible. */ - tc = ((unsigned long) *text_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; - dc = (*data_bytes + *bss_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; - totc = (*tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; - if (dc >= totc) return(ENOEXEC); /* stack must be at least 1 click */ - dvir = (*ft == SEPARATE ? 0 : tc); - s_vir = dvir + (totc - sc); -#if (CHIP == INTEL && _WORD_SIZE == 2) - m = size_ok(*ft, tc, dc, sc, dvir, s_vir); -#else - m = (dvir + dc > s_vir) ? ENOMEM : OK; -#endif - ct = hdr.a_hdrlen & BYTE; /* header length */ - if (ct > A_MINHDR) lseek(fd, (off_t) ct, SEEK_SET); /* skip unused hdr */ - return(m); + return(NULL); } /*===========================================================================* * new_mem * *===========================================================================*/ -PRIVATE int new_mem(sh_mp, text_bytes, data_bytes, +PRIVATE int new_mem(rmp, sh_mp, text_bytes, data_bytes, bss_bytes,stk_bytes,tot_bytes) +struct mproc *rmp; /* process to get a new memory map */ struct mproc *sh_mp; /* text can be shared with this process */ vir_bytes text_bytes; /* text segment size in bytes */ vir_bytes data_bytes; /* size of initialized data in bytes */ @@ -303,7 +282,6 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ * the new map to the kernel. Zero the new core image's bss, gap and stack. */ - register struct mproc *rmp = mp; vir_clicks text_clicks, data_clicks, gap_clicks, stack_clicks, tot_clicks; phys_clicks new_base; phys_bytes bytes, base, bss_offset; @@ -333,8 +311,6 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ if (new_base == NO_MEM) return(ENOMEM); /* We've got memory for the new core image. Release the old one. */ - rmp = mp; - if (find_share(rmp, rmp->mp_ino, rmp->mp_dev, rmp->mp_ctime) == NULL) { /* No other process shares the text segment, so free it. */ free_mem(rmp->mp_seg[T].mem_phys, rmp->mp_seg[T].mem_len); @@ -354,6 +330,16 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ rmp->mp_seg[T].mem_phys = new_base; rmp->mp_seg[T].mem_vir = 0; rmp->mp_seg[T].mem_len = text_clicks; + + if (text_clicks > 0) + { + /* Zero the last click of the text segment. Otherwise the + * part of that click may remain unchanged. + */ + base = (phys_bytes)(new_base+text_clicks-1) << CLICK_SHIFT; + if ((s= sys_memset(0, base, CLICK_SIZE)) != OK) + panic(__FILE__, "new_mem: sys_memset failed", s); + } } rmp->mp_seg[D].mem_phys = new_base + text_clicks; rmp->mp_seg[D].mem_vir = 0; @@ -369,7 +355,7 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ + rmp->mp_seg[D].mem_len + gap_clicks; #endif - if((r2=sys_newmap(who_e, rmp->mp_seg)) != OK) { + if((r2=sys_newmap(rmp->mp_endpoint, rmp->mp_seg)) != OK) { /* report new map to the kernel */ panic(__FILE__,"sys_newmap failed", r2); } @@ -390,215 +376,3 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ return(OK); } - -/*===========================================================================* - * patch_ptr * - *===========================================================================*/ -PRIVATE void patch_ptr(stack, base) -char stack[ARG_MAX]; /* pointer to stack image within PM */ -vir_bytes base; /* virtual address of stack base inside user */ -{ -/* When doing an exec(name, argv, envp) call, the user builds up a stack - * image with arg and env pointers relative to the start of the stack. Now - * these pointers must be relocated, since the stack is not positioned at - * address 0 in the user's address space. - */ - - char **ap, flag; - vir_bytes v; - - flag = 0; /* counts number of 0-pointers seen */ - ap = (char **) stack; /* points initially to 'nargs' */ - ap++; /* now points to argv[0] */ - while (flag < 2) { - if (ap >= (char **) &stack[ARG_MAX]) return; /* too bad */ - if (*ap != NULL) { - v = (vir_bytes) *ap; /* v is relative pointer */ - v += base; /* relocate it */ - *ap = (char *) v; /* put it back */ - } else { - flag++; - } - ap++; - } -} - -/*===========================================================================* - * insert_arg * - *===========================================================================*/ -PRIVATE int insert_arg(stack, stk_bytes, arg, replace) -char stack[ARG_MAX]; /* pointer to stack image within PM */ -vir_bytes *stk_bytes; /* size of initial stack */ -char *arg; /* argument to prepend/replace as new argv[0] */ -int replace; -{ -/* Patch the stack so that arg will become argv[0]. Be careful, the stack may - * be filled with garbage, although it normally looks like this: - * nargs argv[0] ... argv[nargs-1] NULL envp[0] ... NULL - * followed by the strings "pointed" to by the argv[i] and the envp[i]. The - * pointers are really offsets from the start of stack. - * Return true iff the operation succeeded. - */ - int offset, a0, a1, old_bytes = *stk_bytes; - - /* Prepending arg adds at least one string and a zero byte. */ - offset = strlen(arg) + 1; - - a0 = (int) ((char **) stack)[1]; /* argv[0] */ - if (a0 < 4 * PTRSIZE || a0 >= old_bytes) return(FALSE); - - a1 = a0; /* a1 will point to the strings to be moved */ - if (replace) { - /* Move a1 to the end of argv[0][] (argv[1] if nargs > 1). */ - do { - if (a1 == old_bytes) return(FALSE); - --offset; - } while (stack[a1++] != 0); - } else { - offset += PTRSIZE; /* new argv[0] needs new pointer in argv[] */ - a0 += PTRSIZE; /* location of new argv[0][]. */ - } - - /* stack will grow by offset bytes (or shrink by -offset bytes) */ - if ((*stk_bytes += offset) > ARG_MAX) return(FALSE); - - /* Reposition the strings by offset bytes */ - memmove(stack + a1 + offset, stack + a1, old_bytes - a1); - - strcpy(stack + a0, arg); /* Put arg in the new space. */ - - if (!replace) { - /* Make space for a new argv[0]. */ - memmove(stack + 2 * PTRSIZE, stack + 1 * PTRSIZE, a0 - 2 * PTRSIZE); - - ((char **) stack)[0]++; /* nargs++; */ - } - /* Now patch up argv[] and envp[] by offset. */ - patch_ptr(stack, (vir_bytes) offset); - ((char **) stack)[1] = (char *) a0; /* set argv[0] correctly */ - return(TRUE); -} - -/*===========================================================================* - * patch_stack * - *===========================================================================*/ -PRIVATE char *patch_stack(fd, stack, stk_bytes, script) -int fd; /* file descriptor to open script file */ -char stack[ARG_MAX]; /* pointer to stack image within PM */ -vir_bytes *stk_bytes; /* size of initial stack */ -char *script; /* name of script to interpret */ -{ -/* Patch the argument vector to include the path name of the script to be - * interpreted, and all strings on the #! line. Returns the path name of - * the interpreter. - */ - char *sp, *interp = NULL; - int n; - enum { INSERT=FALSE, REPLACE=TRUE }; - - /* Make script[] the new argv[0]. */ - if (!insert_arg(stack, stk_bytes, script, REPLACE)) return(NULL); - - if (lseek(fd, 2L, 0) == -1 /* just behind the #! */ - || (n= read(fd, script, PATH_MAX)) < 0 /* read line one */ - || (sp= memchr(script, '\n', n)) == NULL) /* must be a proper line */ - return(NULL); - - /* Move sp backwards through script[], prepending each string to stack. */ - for (;;) { - /* skip spaces behind argument. */ - while (sp > script && (*--sp == ' ' || *sp == '\t')) {} - if (sp == script) break; - - sp[1] = 0; - /* Move to the start of the argument. */ - while (sp > script && sp[-1] != ' ' && sp[-1] != '\t') --sp; - - interp = sp; - if (!insert_arg(stack, stk_bytes, sp, INSERT)) return(NULL); - } - - /* Round *stk_bytes up to the size of a pointer for alignment contraints. */ - *stk_bytes= ((*stk_bytes + PTRSIZE - 1) / PTRSIZE) * PTRSIZE; - - close(fd); - return(interp); -} - -/*===========================================================================* - * rw_seg * - *===========================================================================*/ -PUBLIC void rw_seg(rw, fd, proc_e, seg, seg_bytes0) -int rw; /* 0 = read, 1 = write */ -int fd; /* file descriptor to read from / write to */ -int proc_e; /* process number (endpoint) */ -int seg; /* T, D, or S */ -phys_bytes seg_bytes0; /* how much is to be transferred? */ -{ -/* Transfer text or data from/to a file and copy to/from a process segment. - * This procedure is a little bit tricky. The logical way to transfer a - * segment would be block by block and copying each block to/from the user - * space one at a time. This is too slow, so we do something dirty here, - * namely send the user space and virtual address to the file system in the - * upper 10 bits of the file descriptor, and pass it the user virtual address - * instead of a PM address. The file system extracts these parameters when - * gets a read or write call from the process manager, which is the only - * process that is permitted to use this trick. The file system then copies - * the whole segment directly to/from user space, bypassing PM completely. - * - * The byte count on read is usually smaller than the segment count, because - * a segment is padded out to a click multiple, and the data segment is only - * partially initialized. - */ - - int bytes, r, proc_n; - char *ubuf_ptr; - struct mem_map *sp; - phys_bytes seg_bytes = seg_bytes0; - - if(pm_isokendpt(proc_e, &proc_n) != OK || proc_n < 0) - return; - - sp = &mproc[proc_n].mp_seg[seg]; - - ubuf_ptr = (char *) ((vir_bytes) sp->mem_vir << CLICK_SHIFT); - - while (seg_bytes != 0) { -#define PM_CHUNK_SIZE 8192 - bytes = MIN((INT_MAX / PM_CHUNK_SIZE) * PM_CHUNK_SIZE, seg_bytes); - if(!rw) { - r = _read_pm(fd, ubuf_ptr, bytes, seg, proc_e); - } else { - r = _write_pm(fd, ubuf_ptr, bytes, seg, proc_e); - } - if (r != bytes) break; - ubuf_ptr += bytes; - seg_bytes -= bytes; - } -} - -/*===========================================================================* - * find_share * - *===========================================================================*/ -PUBLIC struct mproc *find_share(mp_ign, ino, dev, ctime) -struct mproc *mp_ign; /* process that should not be looked at */ -ino_t ino; /* parameters that uniquely identify a file */ -dev_t dev; -time_t ctime; -{ -/* Look for a process that is the file in execution. Don't - * accidentally "find" mp_ign, because it is the process on whose behalf this - * call is made. - */ - struct mproc *sh_mp; - for (sh_mp = &mproc[0]; sh_mp < &mproc[NR_PROCS]; sh_mp++) { - - if (!(sh_mp->mp_flags & SEPARATE)) continue; - if (sh_mp == mp_ign) continue; - if (sh_mp->mp_ino != ino) continue; - if (sh_mp->mp_dev != dev) continue; - if (sh_mp->mp_ctime != ctime) continue; - return sh_mp; - } - return(NULL); -} diff --git a/servers/pm/forkexit.c b/servers/pm/forkexit.c index a0a4cb62d..ac5f3d137 100644 --- a/servers/pm/forkexit.c +++ b/servers/pm/forkexit.c @@ -11,6 +11,7 @@ * do_pm_exit: perform the EXIT system call (by calling pm_exit()) * pm_exit: actually do the exiting * do_wait: perform the WAITPID or WAIT system call + * tell_parent: tell parent about the death of a child */ #include "pm.h" @@ -103,21 +104,117 @@ PUBLIC int do_fork() rmc->mp_pid = new_pid; /* assign pid to child */ /* Tell kernel and file system about the (now successful) FORK. */ - if((r=sys_fork(who_e, child_nr, &rmc->mp_endpoint)) != OK) { + if((r=sys_fork(who_e, child_nr, &rmc->mp_endpoint, rmc->mp_seg)) != OK) { panic(__FILE__,"do_fork can't sys_fork", r); } - tell_fs(FORK, who_e, rmc->mp_endpoint, rmc->mp_pid); - /* Report child's memory map to kernel. */ - if((r=sys_newmap(rmc->mp_endpoint, rmc->mp_seg)) != OK) { - panic(__FILE__,"do_fork can't sys_newmap", r); + if (rmc->mp_fs_call != PM_IDLE) + panic("pm", "do_fork: not idle", rmc->mp_fs_call); + rmc->mp_fs_call= PM_FORK; + r= notify(FS_PROC_NR); + if (r != OK) panic("pm", "do_fork: unable to notify FS", r); + + /* Do not reply until FS is ready to process the fork + * request + */ + return SUSPEND; +} + +/*===========================================================================* + * do_fork_nb * + *===========================================================================*/ +PUBLIC int do_fork_nb() +{ +/* The process pointed to by 'mp' has forked. Create a child process. */ + register struct mproc *rmp; /* pointer to parent */ + register struct mproc *rmc; /* pointer to child */ + int child_nr, s; + phys_clicks prog_clicks, child_base; + phys_bytes prog_bytes, parent_abs, child_abs; /* Intel only */ + pid_t new_pid; + static int next_child; + int n = 0, r; + + /* Only system processes are allowed to use fork_nb */ + if (!(mp->mp_flags & PRIV_PROC)) + return EPERM; + + /* If tables might fill up during FORK, don't even start since recovery half + * way through is such a nuisance. + */ + rmp = mp; + if ((procs_in_use == NR_PROCS) || + (procs_in_use >= NR_PROCS-LAST_FEW && rmp->mp_effuid != 0)) + { + printf("PM: warning, process table is full!\n"); + return(EAGAIN); + } + + /* Determine how much memory to allocate. Only the data and stack need to + * be copied, because the text segment is either shared or of zero length. + */ + prog_clicks = (phys_clicks) rmp->mp_seg[S].mem_len; + prog_clicks += (rmp->mp_seg[S].mem_vir - rmp->mp_seg[D].mem_vir); + prog_bytes = (phys_bytes) prog_clicks << CLICK_SHIFT; + if ( (child_base = alloc_mem(prog_clicks)) == NO_MEM) return(ENOMEM); + + /* Create a copy of the parent's core image for the child. */ + child_abs = (phys_bytes) child_base << CLICK_SHIFT; + parent_abs = (phys_bytes) rmp->mp_seg[D].mem_phys << CLICK_SHIFT; + s = sys_abscopy(parent_abs, child_abs, prog_bytes); + if (s < 0) panic(__FILE__,"do_fork can't copy", s); + + /* Find a slot in 'mproc' for the child process. A slot must exist. */ + do { + next_child = (next_child+1) % NR_PROCS; + n++; + } while((mproc[next_child].mp_flags & IN_USE) && n <= NR_PROCS); + if(n > NR_PROCS) + panic(__FILE__,"do_fork can't find child slot", NO_NUM); + if(next_child < 0 || next_child >= NR_PROCS + || (mproc[next_child].mp_flags & IN_USE)) + panic(__FILE__,"do_fork finds wrong child slot", next_child); + + rmc = &mproc[next_child]; + /* Set up the child and its memory map; copy its 'mproc' slot from parent. */ + child_nr = (int)(rmc - mproc); /* slot number of the child */ + procs_in_use++; + *rmc = *rmp; /* copy parent's process slot to child's */ + rmc->mp_parent = who_p; /* record child's parent */ + /* inherit only these flags */ + rmc->mp_flags &= (IN_USE|SEPARATE|PRIV_PROC|DONT_SWAP); + rmc->mp_child_utime = 0; /* reset administration */ + rmc->mp_child_stime = 0; /* reset administration */ + + /* A separate I&D child keeps the parents text segment. The data and stack + * segments must refer to the new copy. + */ + if (!(rmc->mp_flags & SEPARATE)) rmc->mp_seg[T].mem_phys = child_base; + rmc->mp_seg[D].mem_phys = child_base; + rmc->mp_seg[S].mem_phys = rmc->mp_seg[D].mem_phys + + (rmp->mp_seg[S].mem_vir - rmp->mp_seg[D].mem_vir); + rmc->mp_exitstatus = 0; + rmc->mp_sigstatus = 0; + + /* Find a free pid for the child and put it in the table. */ + new_pid = get_free_pid(); + rmc->mp_pid = new_pid; /* assign pid to child */ + + /* Tell kernel and file system about the (now successful) FORK. */ + if((r=sys_fork(who_e, child_nr, &rmc->mp_endpoint, rmc->mp_seg)) != OK) { + panic(__FILE__,"do_fork can't sys_fork", r); } - /* Reply to child to wake it up. */ - setreply(child_nr, 0); /* only parent gets details */ - rmp->mp_reply.endpt = rmc->mp_endpoint; /* child's process number */ + if (rmc->mp_fs_call != PM_IDLE) + panic("pm", "do_fork: not idle", rmc->mp_fs_call); + rmc->mp_fs_call= PM_FORK_NB; + r= notify(FS_PROC_NR); + if (r != OK) panic("pm", "do_fork: unable to notify FS", r); + + /* Wakeup the newly created process */ + setreply(rmc-mproc, OK); - return(new_pid); /* child's pid */ + return rmc->mp_pid; } /*===========================================================================* @@ -128,16 +225,17 @@ PUBLIC int do_pm_exit() /* Perform the exit(status) system call. The real work is done by pm_exit(), * which is also called when a process is killed by a signal. */ - pm_exit(mp, m_in.status); + pm_exit(mp, m_in.status, FALSE /*!for_trace*/); return(SUSPEND); /* can't communicate from beyond the grave */ } /*===========================================================================* * pm_exit * *===========================================================================*/ -PUBLIC void pm_exit(rmp, exit_status) +PUBLIC void pm_exit(rmp, exit_status, for_trace) register struct mproc *rmp; /* pointer to the process to be terminated */ int exit_status; /* the process' exit status (for parent) */ +int for_trace; { /* A process is done. Release most of the process' possessions. If its * parent is waiting, release the rest, else keep the process slot and @@ -173,38 +271,51 @@ int exit_status; /* the process' exit status (for parent) */ * such as copying to/ from the exiting process, before it is gone. */ sys_nice(proc_nr_e, PRIO_STOP); /* stop the process */ + + if (proc_nr_e == INIT_PROC_NR) + { + printf("PM: INIT died\n"); + return; + } + else if(proc_nr_e != FS_PROC_NR) /* if it is not FS that is exiting.. */ - tell_fs(EXIT, proc_nr_e, 0, 0); /* tell FS to free the slot */ + { + /* Tell FS about the exiting process. */ + if (rmp->mp_fs_call != PM_IDLE) + panic(__FILE__, "pm_exit: not idle", rmp->mp_fs_call); + rmp->mp_fs_call= (for_trace ? PM_EXIT_TR : PM_EXIT); + r= notify(FS_PROC_NR); + if (r != OK) panic(__FILE__, "pm_exit: unable to notify FS", r); + + if (rmp->mp_flags & PRIV_PROC) + { + /* destroy system processes without waiting for FS */ + if((r= sys_exit(rmp->mp_endpoint)) != OK) + panic(__FILE__, "pm_exit: sys_exit failed", r); + } + } else + { printf("PM: FS died\n"); - if((r=sys_exit(proc_nr_e)) != OK) /* destroy the process */ - panic(__FILE__,"pm_exit: sys_exit failed", r); + return; + } /* Pending reply messages for the dead process cannot be delivered. */ rmp->mp_flags &= ~REPLY; - - /* Release the memory occupied by the child. */ - if (find_share(rmp, rmp->mp_ino, rmp->mp_dev, rmp->mp_ctime) == NULL) { - /* No other process shares the text segment, so free it. */ - free_mem(rmp->mp_seg[T].mem_phys, rmp->mp_seg[T].mem_len); - } - /* Free the data and stack segments. */ - free_mem(rmp->mp_seg[D].mem_phys, - rmp->mp_seg[S].mem_vir - + rmp->mp_seg[S].mem_len - rmp->mp_seg[D].mem_vir); - /* The process slot can only be freed if the parent has done a WAIT. */ + /* Keep the process around until FS is finished with it. */ + rmp->mp_exitstatus = (char) exit_status; - pidarg = p_mp->mp_wpid; /* who's being waited for? */ parent_waiting = p_mp->mp_flags & WAITING; right_child = /* child meets one of the 3 tests? */ (pidarg == -1 || pidarg == rmp->mp_pid || -pidarg == rmp->mp_procgrp); if (parent_waiting && right_child) { - cleanup(rmp); /* tell parent and release child slot */ + tell_parent(rmp); /* tell parent */ } else { - rmp->mp_flags = IN_USE|ZOMBIE; /* parent not waiting, zombify child */ + rmp->mp_flags &= (IN_USE|PRIV_PROC); + rmp->mp_flags |= ZOMBIE; /* parent not waiting, zombify child */ sig_proc(p_mp, SIGCHLD); /* send parent a "child died" signal */ } @@ -214,7 +325,8 @@ int exit_status; /* the process' exit status (for parent) */ /* 'rmp' now points to a child to be disinherited. */ rmp->mp_parent = INIT_PROC_NR; parent_waiting = mproc[INIT_PROC_NR].mp_flags & WAITING; - if (parent_waiting && (rmp->mp_flags & ZOMBIE)) cleanup(rmp); + if (parent_waiting && (rmp->mp_flags & ZOMBIE)) + cleanup(rmp); } } @@ -259,7 +371,9 @@ PUBLIC int do_waitpid() children++; /* this child is acceptable */ if (rp->mp_flags & ZOMBIE) { /* This child meets the pid test and has exited. */ - cleanup(rp); /* this child has already exited */ + tell_parent(rp); /* this child has already exited */ + if (rp->mp_fs_call == PM_IDLE) + real_cleanup(rp); return(SUSPEND); } if ((rp->mp_flags & STOPPED) && rp->mp_sigstatus) { @@ -293,20 +407,47 @@ register struct mproc *child; /* tells which process is exiting */ /* Finish off the exit of a process. The process has exited or been killed * by a signal, and its parent is waiting. */ - struct mproc *parent = &mproc[child->mp_parent]; - int exitstatus; + + if (child->mp_fs_call != PM_IDLE) + panic(__FILE__, "cleanup: not idle", child->mp_fs_call); + + tell_parent(child); + real_cleanup(child); + +} + +/*===========================================================================* + * tell_parent * + *===========================================================================*/ +PUBLIC void tell_parent(child) +register struct mproc *child; /* tells which process is exiting */ +{ + int exitstatus, mp_parent; + struct mproc *parent; + + mp_parent= child->mp_parent; + if (mp_parent <= 0) + panic(__FILE__, "tell_parent: bad value in mp_parent", mp_parent); + parent = &mproc[mp_parent]; /* Wake up the parent by sending the reply message. */ exitstatus = (child->mp_exitstatus << 8) | (child->mp_sigstatus & 0377); parent->mp_reply.reply_res2 = exitstatus; setreply(child->mp_parent, child->mp_pid); parent->mp_flags &= ~WAITING; /* parent no longer waiting */ + child->mp_flags &= ~ZOMBIE; /* avoid informing parent twice */ +} +/*===========================================================================* + * real_cleanup * + *===========================================================================*/ +PUBLIC void real_cleanup(rmp) +register struct mproc *rmp; /* tells which process is exiting */ +{ /* Release the process table entry and reinitialize some field. */ - child->mp_pid = 0; - child->mp_flags = 0; - child->mp_child_utime = 0; - child->mp_child_stime = 0; + rmp->mp_pid = 0; + rmp->mp_flags = 0; + rmp->mp_child_utime = 0; + rmp->mp_child_stime = 0; procs_in_use--; } - diff --git a/servers/pm/getset.c b/servers/pm/getset.c index 5b0376ce6..dd0868aec 100644 --- a/servers/pm/getset.c +++ b/servers/pm/getset.c @@ -50,8 +50,21 @@ PUBLIC int do_getset() return(EPERM); if(call_nr == SETUID) rmp->mp_realuid = (uid_t) m_in.usr_id; rmp->mp_effuid = (uid_t) m_in.usr_id; - tell_fs(SETUID, who_e, rmp->mp_realuid, rmp->mp_effuid); - r = OK; + + if (rmp->mp_fs_call != PM_IDLE) + { + panic(__FILE__, "do_getset: not idle", + rmp->mp_fs_call); + } + rmp->mp_fs_call= PM_SETUID; + r= notify(FS_PROC_NR); + if (r != OK) + panic(__FILE__, "do_getset: unable to notify FS", r); + + /* Do not reply until FS is ready to process the setuid + * request + */ + r= SUSPEND; break; case SETEGID: @@ -61,15 +74,42 @@ PUBLIC int do_getset() return(EPERM); if(call_nr == SETGID) rmp->mp_realgid = (gid_t) m_in.grp_id; rmp->mp_effgid = (gid_t) m_in.grp_id; - tell_fs(SETGID, who_e, rmp->mp_realgid, rmp->mp_effgid); - r = OK; + + if (rmp->mp_fs_call != PM_IDLE) + { + panic(__FILE__, "do_getset: not idle", + rmp->mp_fs_call); + } + rmp->mp_fs_call= PM_SETGID; + r= notify(FS_PROC_NR); + if (r != OK) + panic(__FILE__, "do_getset: unable to notify FS", r); + + /* Do not reply until FS is ready to process the setgid + * request + */ + r= SUSPEND; break; case SETSID: if (rmp->mp_procgrp == rmp->mp_pid) return(EPERM); rmp->mp_procgrp = rmp->mp_pid; - tell_fs(SETSID, who_e, 0, 0); - /* fall through */ + + if (rmp->mp_fs_call != PM_IDLE) + { + panic(__FILE__, "do_getset: not idle", + rmp->mp_fs_call); + } + rmp->mp_fs_call= PM_SETSID; + r= notify(FS_PROC_NR); + if (r != OK) + panic(__FILE__, "do_getset: unable to notify FS", r); + + /* Do not reply until FS is ready to process the setsid + * request + */ + r= SUSPEND; + break; case GETPGRP: r = rmp->mp_procgrp; diff --git a/servers/pm/glo.h b/servers/pm/glo.h index 467d1a179..8c4272ecd 100644 --- a/servers/pm/glo.h +++ b/servers/pm/glo.h @@ -20,3 +20,11 @@ extern char core_name[]; /* file name where core images are produced */ EXTERN sigset_t core_sset; /* which signals cause core images */ EXTERN sigset_t ign_sset; /* which signals are by default ignored */ +EXTERN time_t boottime; /* time when the system was booted (for + * reporting to FS) + */ +EXTERN int report_reboot; /* During reboot to report to FS that we are + * rebooting. + */ +EXTERN int abort_flag; +EXTERN char monitor_code[256]; diff --git a/servers/pm/main.c b/servers/pm/main.c index 9d1c0de89..62aa53beb 100644 --- a/servers/pm/main.c +++ b/servers/pm/main.c @@ -34,6 +34,8 @@ FORWARD _PROTOTYPE( void get_mem_chunks, (struct memory *mem_chunks) ); FORWARD _PROTOTYPE( void patch_mem_chunks, (struct memory *mem_chunks, struct mem_map *map_ptr) ); FORWARD _PROTOTYPE( void do_x86_vm, (struct memory mem_chunks[NR_MEMS]) ); +FORWARD _PROTOTYPE( void send_work, (void) ); +FORWARD _PROTOTYPE( void handle_fs_reply, (message *m_ptr) ); #define click_to_round_k(n) \ ((unsigned) ((((unsigned long) (n) << CLICK_SHIFT) + 512) / 1024)) @@ -55,21 +57,51 @@ PUBLIC int main() get_work(); /* wait for an PM system call */ /* Check for system notifications first. Special cases. */ - if (call_nr == SYN_ALARM) { + switch(call_nr) + { + case SYN_ALARM: pm_expire_timers(m_in.NOTIFY_TIMESTAMP); result = SUSPEND; /* don't reply */ - } else if (call_nr == SYS_SIG) { /* signals pending */ + break; + case SYS_SIG: /* signals pending */ sigset = m_in.NOTIFY_ARG; if (sigismember(&sigset, SIGKSIG)) { (void) ksig_pending(); } result = SUSPEND; /* don't reply */ - } - /* Else, if the system call number is valid, perform the call. */ - else if ((unsigned) call_nr >= NCALLS) { - result = ENOSYS; - } else { - result = (*call_vec[call_nr])(); + break; + case PM_GET_WORK: + if (who_e == FS_PROC_NR) + { + send_work(); + result= SUSPEND; /* don't reply */ + } + else + result= ENOSYS; + break; + case PM_EXIT_REPLY: + case PM_REBOOT_REPLY: + case PM_EXEC_REPLY: + case PM_CORE_REPLY: + case PM_EXIT_REPLY_TR: + if (who_e == FS_PROC_NR) + { + handle_fs_reply(&m_in); + result= SUSPEND; /* don't reply */ + } + else + result= ENOSYS; + break; + default: + /* Else, if the system call number is valid, perform the + * call. + */ + if ((unsigned) call_nr >= NCALLS) { + result = ENOSYS; + } else { + result = (*call_vec[call_nr])(); + } + break; } /* Send the results back to the user to indicate completion. */ @@ -182,6 +214,8 @@ PRIVATE void pm_init() /* Initialize process table, including timers. */ for (rmp=&mproc[0]; rmp<&mproc[NR_PROCS]; rmp++) { tmr_inittimer(&rmp->mp_timer); + + rmp->mp_fs_call= PM_IDLE; } /* Build the set of signals which cause core dumps, and the set of signals @@ -470,3 +504,379 @@ struct memory mem_chunks[NR_MEMS]; if (r != 0) printf("do_x86_vm: sys_vm_setbuf failed: %d\n", r); } + +/*=========================================================================* + * send_work * + *=========================================================================*/ +PRIVATE void send_work() +{ + int r, call; + struct mproc *rmp; + message m; + + m.m_type= PM_IDLE; + for (rmp= mproc; rmp < &mproc[NR_PROCS]; rmp++) + { + call= rmp->mp_fs_call; + if (call == PM_IDLE) + continue; + switch(call) + { + case PM_STIME: + m.m_type= call; + m.PM_STIME_TIME= boottime; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + /* Wakeup the original caller */ + setreply(rmp-mproc, OK); + break; + + case PM_SETSID: + m.m_type= call; + m.PM_SETSID_PROC= rmp->mp_endpoint; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + /* Wakeup the original caller */ + setreply(rmp-mproc, rmp->mp_procgrp); + break; + + case PM_SETGID: + m.m_type= call; + m.PM_SETGID_PROC= rmp->mp_endpoint; + m.PM_SETGID_EGID= rmp->mp_effgid; + m.PM_SETGID_RGID= rmp->mp_realgid; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + /* Wakeup the original caller */ + setreply(rmp-mproc, OK); + break; + + case PM_SETUID: + m.m_type= call; + m.PM_SETUID_PROC= rmp->mp_endpoint; + m.PM_SETUID_EGID= rmp->mp_effuid; + m.PM_SETUID_RGID= rmp->mp_realuid; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + /* Wakeup the original caller */ + setreply(rmp-mproc, OK); + break; + + case PM_FORK: + { + int parent_e, parent_p; + struct mproc *parent_mp; + + parent_p = rmp->mp_parent; + parent_mp = &mproc[parent_p]; + + m.m_type= call; + m.PM_FORK_PPROC= parent_mp->mp_endpoint; + m.PM_FORK_CPROC= rmp->mp_endpoint; + m.PM_FORK_CPID= rmp->mp_pid; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + /* Wakeup the newly created process */ + setreply(rmp-mproc, OK); + + /* Wakeup the parent */ + setreply(parent_mp-mproc, rmp->mp_pid); + break; + } + + case PM_EXIT: + case PM_EXIT_TR: + m.m_type= call; + m.PM_EXIT_PROC= rmp->mp_endpoint; + + /* Mark the process as busy */ + rmp->mp_fs_call= PM_BUSY; + + break; + + case PM_UNPAUSE: + case PM_UNPAUSE_TR: + m.m_type= call; + m.PM_UNPAUSE_PROC= rmp->mp_endpoint; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + if (call == PM_UNPAUSE) + { + /* Ask the kernel to deliver the signal */ + r= sys_sigsend(rmp->mp_endpoint, + &rmp->mp_sigmsg); + if (r != OK) + panic(__FILE__,"sys_sigsend failed",r); + } + + break; + + case PM_EXEC: + m.m_type= call; + m.PM_EXEC_PROC= rmp->mp_endpoint; + m.PM_EXEC_PATH= rmp->mp_exec_path; + m.PM_EXEC_PATH_LEN= rmp->mp_exec_path_len; + m.PM_EXEC_FRAME= rmp->mp_exec_frame; + m.PM_EXEC_FRAME_LEN= rmp->mp_exec_frame_len; + + /* Mark the process as busy */ + rmp->mp_fs_call= PM_BUSY; + + break; + + case PM_FORK_NB: + { + int parent_e, parent_p; + struct mproc *parent_mp; + + parent_p = rmp->mp_parent; + parent_mp = &mproc[parent_p]; + + m.m_type= PM_FORK; + m.PM_FORK_PPROC= parent_mp->mp_endpoint; + m.PM_FORK_CPROC= rmp->mp_endpoint; + m.PM_FORK_CPID= rmp->mp_pid; + + /* FS does not reply */ + rmp->mp_fs_call= PM_IDLE; + + break; + } + + case PM_DUMPCORE: + m.m_type= call; + m.PM_CORE_PROC= rmp->mp_endpoint; + m.PM_CORE_SEGPTR= (char *)rmp->mp_seg; + + /* Mark the process as busy */ + rmp->mp_fs_call= PM_BUSY; + + break; + + default: + printf("send_work: should report call 0x%x to FS\n", + call); + break; + } + break; + } + if (m.m_type != PM_IDLE) + { + if (rmp->mp_fs_call == PM_IDLE && + (rmp->mp_flags & PM_SIG_PENDING)) + { + rmp->mp_flags &= ~PM_SIG_PENDING; + check_pending(rmp); + if (!(rmp->mp_flags & PM_SIG_PENDING)) + { + /* Allow the process to be scheduled */ + sys_nice(rmp->mp_endpoint, rmp->mp_nice); + } + } + } + else if (report_reboot) + { + m.m_type= PM_REBOOT; + report_reboot= FALSE; + } + r= send(FS_PROC_NR, &m); + if (r != OK) panic("pm", "send_work: send failed", r); + +} + +PRIVATE void handle_fs_reply(m_ptr) +message *m_ptr; +{ + int r, proc_e, proc_n; + struct mproc *rmp; + + switch(m_ptr->m_type) + { + case PM_EXIT_REPLY: + case PM_EXIT_REPLY_TR: + proc_e= m_ptr->PM_EXIT_PROC; + if (pm_isokendpt(proc_e, &proc_n) != OK) + { + panic(__FILE__, + "PM_EXIT_REPLY: got bad endpoint from FS", + proc_e); + } + rmp= &mproc[proc_n]; + + /* Call is finished */ + rmp->mp_fs_call= PM_IDLE; + + if (!(rmp->mp_flags & PRIV_PROC)) + { + /* destroy the (user) process */ + if((r=sys_exit(proc_e)) != OK) + { + panic(__FILE__, + "PM_EXIT_REPLY: sys_exit failed", r); + } + } + + /* Release the memory occupied by the child. */ + if (find_share(rmp, rmp->mp_ino, rmp->mp_dev, + rmp->mp_ctime) == NULL) { + /* No other process shares the text segment, + * so free it. + */ + free_mem(rmp->mp_seg[T].mem_phys, + rmp->mp_seg[T].mem_len); + } + /* Free the data and stack segments. */ + free_mem(rmp->mp_seg[D].mem_phys, rmp->mp_seg[S].mem_vir + + rmp->mp_seg[S].mem_len - rmp->mp_seg[D].mem_vir); + + if (m_ptr->m_type == PM_EXIT_REPLY_TR && + rmp->mp_parent != INIT_PROC_NR) + { + /* Wake up the parent */ + mproc[rmp->mp_parent].mp_reply.reply_trace = 0; + setreply(rmp->mp_parent, OK); + } + + /* Clean up if the parent has collected the exit + * status + */ + if (!(rmp->mp_flags & ZOMBIE)) + real_cleanup(rmp); + + break; + + case PM_REBOOT_REPLY: + { + vir_bytes code_addr; + size_t code_size; + + /* Ask the kernel to abort. All system services, including + * the PM, will get a HARD_STOP notification. Await the + * notification in the main loop. + */ + code_addr = (vir_bytes) monitor_code; + code_size = strlen(monitor_code) + 1; + sys_abort(abort_flag, PM_PROC_NR, code_addr, code_size); + break; + } + + case PM_EXEC_REPLY: + proc_e= m_ptr->PM_EXEC_PROC; + if (pm_isokendpt(proc_e, &proc_n) != OK) + { + panic(__FILE__, + "PM_EXIT_REPLY: got bad endpoint from FS", + proc_e); + } + rmp= &mproc[proc_n]; + + /* Call is finished */ + rmp->mp_fs_call= PM_IDLE; + + exec_restart(rmp, m_ptr->PM_EXEC_STATUS); + + if (rmp->mp_flags & PM_SIG_PENDING) + { + printf("handle_fs_reply: restarting signals\n"); + rmp->mp_flags &= ~PM_SIG_PENDING; + check_pending(rmp); + if (!(rmp->mp_flags & PM_SIG_PENDING)) + { + printf("handle_fs_reply: calling sys_nice\n"); + /* Allow the process to be scheduled */ + sys_nice(rmp->mp_endpoint, rmp->mp_nice); + } + else + printf("handle_fs_reply: more signals\n"); + } + break; + + case PM_CORE_REPLY: + { + int parent_waiting, right_child; + pid_t pidarg; + struct mproc *p_mp; + + proc_e= m_ptr->PM_CORE_PROC; + if (pm_isokendpt(proc_e, &proc_n) != OK) + { + panic(__FILE__, + "PM_EXIT_REPLY: got bad endpoint from FS", + proc_e); + } + rmp= &mproc[proc_n]; + + if (m_ptr->PM_CORE_STATUS == OK) + rmp->mp_sigstatus |= DUMPED; + + /* Call is finished */ + rmp->mp_fs_call= PM_IDLE; + + p_mp = &mproc[rmp->mp_parent]; /* process' parent */ + pidarg = p_mp->mp_wpid; /* who's being waited for? */ + parent_waiting = p_mp->mp_flags & WAITING; + right_child = /* child meets one of the 3 tests? */ + (pidarg == -1 || pidarg == rmp->mp_pid || + -pidarg == rmp->mp_procgrp); + + if (parent_waiting && right_child) { + tell_parent(rmp); /* tell parent */ + } else { + /* parent not waiting, zombify child */ + rmp->mp_flags &= (IN_USE|PRIV_PROC); + rmp->mp_flags |= ZOMBIE; + /* send parent a "child died" signal */ + sig_proc(p_mp, SIGCHLD); + } + + if (!(rmp->mp_flags & PRIV_PROC)) + { + /* destroy the (user) process */ + if((r=sys_exit(proc_e)) != OK) + { + panic(__FILE__, + "PM_CORE_REPLY: sys_exit failed", r); + } + } + + /* Release the memory occupied by the child. */ + if (find_share(rmp, rmp->mp_ino, rmp->mp_dev, + rmp->mp_ctime) == NULL) { + /* No other process shares the text segment, + * so free it. + */ + free_mem(rmp->mp_seg[T].mem_phys, + rmp->mp_seg[T].mem_len); + } + /* Free the data and stack segments. */ + free_mem(rmp->mp_seg[D].mem_phys, rmp->mp_seg[S].mem_vir + + rmp->mp_seg[S].mem_len - rmp->mp_seg[D].mem_vir); + + /* Clean up if the parent has collected the exit + * status + */ + if (!(rmp->mp_flags & ZOMBIE)) + real_cleanup(rmp); + + break; + } + default: + panic(__FILE__, "handle_fs_reply: unknown reply type", + m_ptr->m_type); + break; + } + +} + diff --git a/servers/pm/misc.c b/servers/pm/misc.c index f5736110a..cc3569d8e 100644 --- a/servers/pm/misc.c +++ b/servers/pm/misc.c @@ -179,10 +179,7 @@ PUBLIC int do_getprocnr() *===========================================================================*/ PUBLIC int do_reboot() { - char monitor_code[256]; - vir_bytes code_addr; - int code_size; - int abort_flag; + int r; /* Check permission to abort the system. */ if (mp->mp_effuid != SUPER_USER) return(EPERM); @@ -197,10 +194,10 @@ PUBLIC int do_reboot() if((r = sys_datacopy(who_e, (vir_bytes) m_in.reboot_code, SELF, (vir_bytes) monitor_code, m_in.reboot_strlen)) != OK) return r; - code_addr = (vir_bytes) monitor_code; monitor_code[m_in.reboot_strlen] = '\0'; - code_size = m_in.reboot_strlen + 1; } + else + monitor_code[0] = '\0'; /* Order matters here. When FS is told to reboot, it exits all its * processes, and then would be confused if they're exited again by @@ -209,12 +206,11 @@ PUBLIC int do_reboot() check_sig(-1, SIGKILL); /* kill all users except init */ sys_nice(INIT_PROC_NR, PRIO_STOP); /* stop init, but keep it around */ - tell_fs(REBOOT, 0, 0, 0); /* tell FS to synchronize */ - /* Ask the kernel to abort. All system services, including the PM, will - * get a HARD_STOP notification. Await the notification in the main loop. - */ - sys_abort(abort_flag, PM_PROC_NR, code_addr, code_size); + report_reboot= 1; + r= notify(FS_PROC_NR); + if (r != OK) panic("pm", "do_reboot: unable to notify FS", r); + return(SUSPEND); /* don't reply to caller */ } @@ -386,44 +382,3 @@ PUBLIC int do_svrctl() return(EINVAL); } } - -/*===========================================================================* - * _read_pm * - *===========================================================================*/ -PUBLIC ssize_t _read_pm(fd, buffer, nbytes, seg, ep) -int fd; -void *buffer; -size_t nbytes; -int seg; -int ep; -{ - message m; - - m.m1_i1 = _PM_SEG_FLAG | fd; - m.m1_i2 = nbytes; - m.m1_p1 = (char *) buffer; - m.m1_p2 = (char *) seg; - m.m1_p3 = (char *) ep; - return(_syscall(FS_PROC_NR, READ, &m)); -} - -/*===========================================================================* - * _write_pm * - *===========================================================================*/ -PUBLIC ssize_t _write_pm(fd, buffer, nbytes, seg, ep) -int fd; -void *buffer; -size_t nbytes; -int seg; -int ep; -{ - message m; - - m.m1_i1 = _PM_SEG_FLAG | fd; - m.m1_i2 = nbytes; - m.m1_p1 = (char *) buffer; - m.m1_p2 = (char *) seg; - m.m1_p3 = (char *) ep; - return(_syscall(FS_PROC_NR, WRITE, &m)); -} - diff --git a/servers/pm/mproc.h b/servers/pm/mproc.h index eba3fef8a..1c9543d92 100644 --- a/servers/pm/mproc.h +++ b/servers/pm/mproc.h @@ -40,6 +40,9 @@ EXTERN struct mproc { sigset_t mp_sigpending; /* pending signals to be handled */ struct sigaction mp_sigact[_NSIG + 1]; /* as in sigaction(2) */ vir_bytes mp_sigreturn; /* address of C library __sigreturn function */ + struct sigmsg mp_sigmsg; /* Save the details of the signal until the + * PM_UNPAUSE request is delivered. + */ struct timer mp_timer; /* watchdog timer for alarm(2) */ /* Backwards compatibility for signals. */ @@ -50,6 +53,13 @@ EXTERN struct mproc { struct mproc *mp_swapq; /* queue of procs waiting to be swapped in */ message mp_reply; /* reply message to be sent to one */ + /* Communication with FS */ + int mp_fs_call; + char *mp_exec_path; /* Path of executable */ + vir_bytes mp_exec_path_len; /* Length of path (including nul) */ + char *mp_exec_frame; /* Arguments */ + vir_bytes mp_exec_frame_len; /* Length of arguments */ + /* Scheduling priority. */ signed int mp_nice; /* nice is PRIO_MIN..PRIO_MAX, standard 0. */ @@ -71,6 +81,8 @@ EXTERN struct mproc { #define SWAPIN 0x800 /* set if on the "swap this in" queue */ #define DONT_SWAP 0x1000 /* never swap out this process */ #define PRIV_PROC 0x2000 /* system process, special privileges */ +#define PM_SIG_PENDING 0x4000 /* process got a signal while waiting for FS */ +#define PARTIAL_EXEC 0x8000 /* Process got a new map but no content */ #define NIL_MPROC ((struct mproc *) 0) diff --git a/servers/pm/proto.h b/servers/pm/proto.h index 7a93dbf41..1b75cfd7a 100644 --- a/servers/pm/proto.h +++ b/servers/pm/proto.h @@ -38,16 +38,21 @@ _PROTOTYPE( int do_fkey_pressed, (void) ); /* exec.c */ _PROTOTYPE( int do_exec, (void) ); -_PROTOTYPE( void rw_seg, (int rw, int fd, int proc, int seg, - phys_bytes seg_bytes) ); +_PROTOTYPE( int exec_newmem, (void) ); +_PROTOTYPE( int do_execrestart, (void) ); +_PROTOTYPE( void exec_restart, (struct mproc *rmp, int result) ); _PROTOTYPE( struct mproc *find_share, (struct mproc *mp_ign, Ino_t ino, Dev_t dev, time_t ctime) ); /* forkexit.c */ _PROTOTYPE( int do_fork, (void) ); +_PROTOTYPE( int do_fork_nb, (void) ); _PROTOTYPE( int do_pm_exit, (void) ); _PROTOTYPE( int do_waitpid, (void) ); -_PROTOTYPE( void pm_exit, (struct mproc *rmp, int exit_status) ); +_PROTOTYPE( void pm_exit, (struct mproc *rmp, int exit_status, + int for_trace) ); +_PROTOTYPE (void tell_parent, (struct mproc *child) ); +_PROTOTYPE( void real_cleanup, (struct mproc *rmp) ); /* getset.c */ _PROTOTYPE( int do_getset, (void) ); @@ -63,9 +68,7 @@ _PROTOTYPE( int do_getprocnr, (void) ); _PROTOTYPE( int do_svrctl, (void) ); _PROTOTYPE( int do_allocmem, (void) ); _PROTOTYPE( int do_freemem, (void) ); -_PROTOTYPE( int do_getsetpriority, (void) ); -_PROTOTYPE( ssize_t _read_pm, (int _fd, void *_buf, size_t _n, int s, int e)); -_PROTOTYPE( ssize_t _write_pm, (int _fd, void *_buf, size_t _n, int s, int e)); +_PROTOTYPE( int do_getsetpriority, (void) ); #if (MACHINE == MACINTOSH) @@ -107,10 +110,8 @@ _PROTOTYPE( void stop_proc, (struct mproc *rmp, int sig_nr) ); /* utility.c */ _PROTOTYPE( pid_t get_free_pid, (void) ); -_PROTOTYPE( int allowed, (char *name_buf, struct stat *s_buf, int mask) ); _PROTOTYPE( int no_sys, (void) ); _PROTOTYPE( void panic, (char *who, char *mess, int num) ); -_PROTOTYPE( void tell_fs, (int what, int p1, int p2, int p3) ); _PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) ); _PROTOTYPE( int get_mem_map, (int proc_nr, struct mem_map *mem_map) ); _PROTOTYPE( char *find_param, (const char *key)); diff --git a/servers/pm/signal.c b/servers/pm/signal.c index afd1c5cd1..c3ae020de 100644 --- a/servers/pm/signal.c +++ b/servers/pm/signal.c @@ -27,16 +27,14 @@ #include #include #include +#include #include #include #include "mproc.h" #include "param.h" -#define CORE_MODE 0777 /* mode to use on core image files */ -#define DUMPED 0200 /* bit set in status when core dumped */ - -FORWARD _PROTOTYPE( void dump_core, (struct mproc *rmp) ); -FORWARD _PROTOTYPE( void unpause, (int pro) ); +FORWARD _PROTOTYPE( int dump_core, (struct mproc *rmp) ); +FORWARD _PROTOTYPE( void unpause, (int pro, int for_trace) ); FORWARD _PROTOTYPE( void handle_ksig, (int proc_nr, sigset_t sig_map) ); FORWARD _PROTOTYPE( void cause_sigalrm, (struct timer *tp) ); @@ -233,8 +231,10 @@ PUBLIC int ksig_pending() * has been handled ... */ if ((mproc[proc_nr_p].mp_flags & (IN_USE | ZOMBIE)) == IN_USE) + { if((r=sys_endksig(proc_nr_e)) != OK) /* ... tell kernel it's done */ panic(__FILE__,"sys_endksig failed", r); + } } } return(SUSPEND); /* prevents sending reply */ @@ -274,10 +274,6 @@ sigset_t sig_map; case SIGQUIT: case SIGWINCH: id = 0; break; /* broadcast to process group */ -#if 0 - case SIGKILL: - id = -1; break; /* broadcast to all except INIT */ -#endif default: id = proc_id; break; @@ -417,7 +413,6 @@ int signo; /* signal to send to process (1 to _NSIG) */ int s; int slot; int sigflags; - struct sigmsg sm; slot = (int) (rmp - mproc); if ((rmp->mp_flags & (IN_USE | ZOMBIE)) != IN_USE) { @@ -425,9 +420,18 @@ int signo; /* signal to send to process (1 to _NSIG) */ signo, (rmp->mp_flags & ZOMBIE) ? "zombie" : "dead", slot); panic(__FILE__,"", NO_NUM); } + if (rmp->mp_fs_call != PM_IDLE) + { + sigaddset(&rmp->mp_sigpending, signo); + rmp->mp_flags |= PM_SIG_PENDING; + /* keep the process from running */ + sys_nice(rmp->mp_endpoint, PRIO_STOP); + return; + + } if ((rmp->mp_flags & TRACED) && signo != SIGKILL) { /* A traced process has special handling. */ - unpause(slot); + unpause(slot, TRUE /*for_trace*/); stop_proc(rmp, signo); /* a signal causes it to stop */ return; } @@ -451,15 +455,16 @@ int signo; /* signal to send to process (1 to _NSIG) */ sigflags = rmp->mp_sigact[signo].sa_flags; if (sigismember(&rmp->mp_catch, signo)) { if (rmp->mp_flags & SIGSUSPENDED) - sm.sm_mask = rmp->mp_sigmask2; + rmp->mp_sigmsg.sm_mask = rmp->mp_sigmask2; else - sm.sm_mask = rmp->mp_sigmask; - sm.sm_signo = signo; - sm.sm_sighandler = (vir_bytes) rmp->mp_sigact[signo].sa_handler; - sm.sm_sigreturn = rmp->mp_sigreturn; + rmp->mp_sigmsg.sm_mask = rmp->mp_sigmask; + rmp->mp_sigmsg.sm_signo = signo; + rmp->mp_sigmsg.sm_sighandler = + (vir_bytes) rmp->mp_sigact[signo].sa_handler; + rmp->mp_sigmsg.sm_sigreturn = rmp->mp_sigreturn; if ((s=get_stack_ptr(rmp->mp_endpoint, &new_sp)) != OK) panic(__FILE__,"couldn't get new stack pointer (for sig)",s); - sm.sm_stkptr = new_sp; + rmp->mp_sigmsg.sm_stkptr = new_sp; /* Make room for the sigcontext and sigframe struct. */ new_sp -= sizeof(struct sigcontext) @@ -478,17 +483,29 @@ int signo; /* signal to send to process (1 to _NSIG) */ sigdelset(&rmp->mp_catch, signo); rmp->mp_sigact[signo].sa_handler = SIG_DFL; } + sigdelset(&rmp->mp_sigpending, signo); - if (OK == (s=sys_sigsend(rmp->mp_endpoint, &sm))) { + /* Check to see if process is hanging on a PAUSE, WAIT or SIGSUSPEND + * call. + */ + if (rmp->mp_flags & (PAUSED | WAITING | SIGSUSPENDED)) { + rmp->mp_flags &= ~(PAUSED | WAITING | SIGSUSPENDED); + setreply(slot, EINTR); - sigdelset(&rmp->mp_sigpending, signo); - /* If process is hanging on PAUSE, WAIT, SIGSUSPEND, tty, - * pipe, etc., release it. - */ - unpause(slot); + /* Ask the kernel to deliver the signal */ + s= sys_sigsend(rmp->mp_endpoint, &rmp->mp_sigmsg); + if (s != OK) + panic(__FILE__, "sys_sigsend failed", s); + + /* Done */ return; } - panic(__FILE__, "sys_sigsend failed", s); + + /* Ask FS to unpause the process. Deliver the signal when FS is + * ready. + */ + unpause(slot, FALSE /*!for_trace*/); + return; } else if (sigismember(&rmp->mp_sig2mess, signo)) { @@ -512,11 +529,14 @@ doterminate: return; } #endif - /* Switch to the user's FS environment and dump core. */ - tell_fs(CHDIR, rmp->mp_endpoint, FALSE, 0); - dump_core(rmp); + + s= dump_core(rmp); + if (s == SUSPEND) + return; + + /* Not dumping core, just call exit */ } - pm_exit(rmp, 0); /* terminate process */ + pm_exit(rmp, 0, FALSE /*!for_trace*/); /* terminate process */ } /*===========================================================================* @@ -618,8 +638,9 @@ register struct mproc *rmp; /*===========================================================================* * unpause * *===========================================================================*/ -PRIVATE void unpause(pro) +PRIVATE void unpause(pro, for_trace) int pro; /* which process number */ +int for_trace; /* for tracing */ { /* A signal is to be sent to a process. If that process is hanging on a * system call, the system call must be terminated with EINTR. Possible @@ -627,8 +648,8 @@ int pro; /* which process number */ * First check if the process is hanging on an PM call. If not, tell FS, * so it can check for READs and WRITEs from pipes, ttys and the like. */ - register struct mproc *rmp; + int r; rmp = &mproc[pro]; @@ -640,30 +661,29 @@ int pro; /* which process number */ } /* Process is not hanging on an PM call. Ask FS to take a look. */ - tell_fs(UNPAUSE, rmp->mp_endpoint, 0, 0); + if (rmp->mp_fs_call != PM_IDLE) + panic("pm", "unpause: not idle", rmp->mp_fs_call); + rmp->mp_fs_call= (for_trace ? PM_UNPAUSE_TR : PM_UNPAUSE); + r= notify(FS_PROC_NR); + if (r != OK) panic("pm", "unpause: unable to notify FS", r); } /*===========================================================================* * dump_core * *===========================================================================*/ -PRIVATE void dump_core(rmp) +PRIVATE int dump_core(rmp) register struct mproc *rmp; /* whose core is to be dumped */ { /* Make a core dump on the file "core", if possible. */ - int s, fd, seg, slot; + int r, proc_nr, proc_nr_e, parent_waiting; + pid_t procgrp; vir_bytes current_sp; - long trace_data, trace_off; + struct mproc *p_mp; + clock_t t[5]; - slot = (int) (rmp - mproc); - - /* Can core file be written? We are operating in the user's FS environment, - * so no special permission checks are needed. - */ - if (rmp->mp_realuid != rmp->mp_effuid) return; - if ( (fd = open(core_name, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, - CORE_MODE)) < 0) return; - rmp->mp_sigstatus |= DUMPED; + /* Do not create core files for set uid execution */ + if (rmp->mp_realuid != rmp->mp_effuid) return OK; /* Make sure the stack segment is up to date. * We don't want adjust() to fail unless current_sp is preposterous, @@ -671,33 +691,91 @@ register struct mproc *rmp; /* whose core is to be dumped */ * the adjust() for sending a signal to fail due to safety checking. * Maybe make SAFETY_BYTES a parameter. */ - if ((s=get_stack_ptr(rmp->mp_endpoint, ¤t_sp)) != OK) - panic(__FILE__,"couldn't get new stack pointer (for core)",s); + if ((r= get_stack_ptr(rmp->mp_endpoint, ¤t_sp)) != OK) + panic(__FILE__,"couldn't get new stack pointer (for core)", r); adjust(rmp, rmp->mp_seg[D].mem_len, current_sp); - /* Write the memory map of all segments to begin the core file. */ - if (write(fd, (char *) rmp->mp_seg, (unsigned) sizeof rmp->mp_seg) - != (unsigned) sizeof rmp->mp_seg) { - close(fd); + /* Tell FS about the exiting process. */ + if (rmp->mp_fs_call != PM_IDLE) + panic(__FILE__, "dump_core: not idle", rmp->mp_fs_call); + rmp->mp_fs_call= PM_DUMPCORE; + r= notify(FS_PROC_NR); + if (r != OK) panic(__FILE__, "dump_core: unable to notify FS", r); + + /* Also perform most of the normal exit processing. Informing the parent + * has to wait until we know whether the coredump was successful or not. + */ + + proc_nr = (int) (rmp - mproc); /* get process slot number */ + proc_nr_e = rmp->mp_endpoint; + + /* Remember a session leader's process group. */ + procgrp = (rmp->mp_pid == mp->mp_procgrp) ? mp->mp_procgrp : 0; + + /* If the exited process has a timer pending, kill it. */ + if (rmp->mp_flags & ALARM_ON) set_alarm(proc_nr_e, (unsigned) 0); + + /* Do accounting: fetch usage times and accumulate at parent. */ + if((r=sys_times(proc_nr_e, t)) != OK) + panic(__FILE__,"pm_exit: sys_times failed", r); + + p_mp = &mproc[rmp->mp_parent]; /* process' parent */ + p_mp->mp_child_utime += t[0] + rmp->mp_child_utime; /* add user time */ + p_mp->mp_child_stime += t[1] + rmp->mp_child_stime; /* add system time */ + + /* Tell the kernel the process is no longer runnable to prevent it from + * being scheduled in between the following steps. Then tell FS that it + * the process has exited and finally, clean up the process at the kernel. + * This order is important so that FS can tell drivers to cancel requests + * such as copying to/ from the exiting process, before it is gone. + */ + sys_nice(proc_nr_e, PRIO_STOP); /* stop the process */ + + if(proc_nr_e != FS_PROC_NR) /* if it is not FS that is exiting.. */ + { + if (rmp->mp_flags & PRIV_PROC) + { + /* destroy system processes without waiting for FS */ + if((r= sys_exit(rmp->mp_endpoint)) != OK) + panic(__FILE__, "pm_exit: sys_exit failed", r); + + /* Just send a SIGCHLD. Dealing with waidpid is too complicated + * here. + */ + p_mp = &mproc[rmp->mp_parent]; /* process' parent */ + sig_proc(p_mp, SIGCHLD); + + /* Zombify to avoid calling sys_endksig */ + rmp->mp_flags |= ZOMBIE; + } + } + else + { + printf("PM: FS died\n"); return; } - /* Write out the whole kernel process table entry to get the regs. */ - trace_off = 0; - while (sys_trace(T_GETUSER, rmp->mp_endpoint, trace_off, &trace_data) == OK) { - if (write(fd, (char *) &trace_data, (unsigned) sizeof (long)) - != (unsigned) sizeof (long)) { - close(fd); - return; + /* Pending reply messages for the dead process cannot be delivered. */ + rmp->mp_flags &= ~REPLY; + + /* Keep the process around until FS is finished with it. */ + + /* If the process has children, disinherit them. INIT is the new parent. */ + for (rmp = &mproc[0]; rmp < &mproc[NR_PROCS]; rmp++) { + if (rmp->mp_flags & IN_USE && rmp->mp_parent == proc_nr) { + /* 'rmp' now points to a child to be disinherited. */ + rmp->mp_parent = INIT_PROC_NR; + parent_waiting = mproc[INIT_PROC_NR].mp_flags & WAITING; + if (parent_waiting && (rmp->mp_flags & ZOMBIE)) + { + tell_parent(rmp); + real_cleanup(rmp); + } } - trace_off += sizeof (long); } - /* Loop through segments and write the segments themselves out. */ - for (seg = 0; seg < NR_LOCAL_SEGS; seg++) { - rw_seg(1, fd, rmp->mp_endpoint, seg, - (phys_bytes) rmp->mp_seg[seg].mem_len << CLICK_SHIFT); - } - close(fd); -} + /* Send a hangup to the process' process group if it was a session leader. */ + if (procgrp != 0) check_sig(-procgrp, SIGHUP); + return SUSPEND; +} diff --git a/servers/pm/table.c b/servers/pm/table.c index d1e8a5b8b..2aa6eedbc 100644 --- a/servers/pm/table.c +++ b/servers/pm/table.c @@ -81,11 +81,11 @@ _PROTOTYPE (int (*call_vec[NCALLS]), (void) ) = { no_sys, /* 64 = unused */ no_sys, /* 65 = UNPAUSE */ - no_sys, /* 66 = unused */ + exec_newmem, /* 66 = EXEC_NEWMEM */ no_sys, /* 67 = REVIVE */ no_sys, /* 68 = TASK_REPLY */ - no_sys, /* 69 = unused */ - no_sys, /* 70 = unused */ + do_fork_nb, /* 69 = FORK_NB */ + do_execrestart, /* 70 = EXEC_RESTART */ do_sigaction, /* 71 = sigaction */ do_sigsuspend, /* 72 = sigsuspend */ do_sigpending, /* 73 = sigpending */ diff --git a/servers/pm/time.c b/servers/pm/time.c index f05f1819f..b5b519177 100644 --- a/servers/pm/time.c +++ b/servers/pm/time.c @@ -13,8 +13,6 @@ #include "mproc.h" #include "param.h" -PRIVATE time_t boottime; - /*===========================================================================* * do_time * *===========================================================================*/ @@ -55,10 +53,14 @@ PUBLIC int do_stime() panic(__FILE__,"do_stime couldn't get uptime", s); boottime = (long) m_in.stime - (uptime/HZ); - /* Also inform FS about the new system time. */ - tell_fs(STIME, boottime, 0, 0); + if (mp->mp_fs_call != PM_IDLE) + panic("pm", "do_stime: not idle", mp->mp_fs_call); + mp->mp_fs_call= PM_STIME; + s= notify(FS_PROC_NR); + if (s != OK) panic("pm", "do_stime: unable to notify FS", s); - return(OK); + /* Do not reply until FS is ready to process the stime request */ + return(SUSPEND); } /*===========================================================================* diff --git a/servers/pm/trace.c b/servers/pm/trace.c index 3fff5de30..600a1404a 100644 --- a/servers/pm/trace.c +++ b/servers/pm/trace.c @@ -55,9 +55,11 @@ PUBLIC int do_trace() */ switch (m_in.request) { case T_EXIT: /* exit */ - pm_exit(child, (int) m_in.data); - mp->mp_reply.reply_trace = 0; - return(OK); + pm_exit(child, (int) m_in.data, TRUE /*for_trace*/); + /* Do not reply to the caller until FS has processed the exit + * request. + */ + return SUSPEND; case T_RESUME: case T_STEP: /* resume execution */ if (m_in.data < 0 || m_in.data > _NSIG) return(EIO); diff --git a/servers/pm/utility.c b/servers/pm/utility.c index 3e03f861e..0ed48be35 100644 --- a/servers/pm/utility.c +++ b/servers/pm/utility.c @@ -6,7 +6,6 @@ * allowed: see if an access is permitted * no_sys: called for invalid system call numbers * panic: PM has run aground of a fatal error - * tell_fs: interface to FS * get_mem_map: get memory map of given process * get_stack_ptr: get stack pointer of given process * proc_from_pid: return process pointer from pid number @@ -52,44 +51,6 @@ PUBLIC pid_t get_free_pid() return(next_pid); } -/*===========================================================================* - * allowed * - *===========================================================================*/ -PUBLIC int allowed(name_buf, s_buf, mask) -char *name_buf; /* pointer to file name to be EXECed */ -struct stat *s_buf; /* buffer for doing and returning stat struct*/ -int mask; /* R_BIT, W_BIT, or X_BIT */ -{ -/* Check to see if file can be accessed. Return EACCES or ENOENT if the access - * is prohibited. If it is legal open the file and return a file descriptor. - */ - int fd; - int save_errno; - - /* Use the fact that mask for access() is the same as the permissions mask. - * E.g., X_BIT in is the same as X_OK in and - * S_IXOTH in . tell_fs(DO_CHDIR, ...) has set PM's real ids - * to the user's effective ids, so access() works right for setuid programs. - */ - if (access(name_buf, mask) < 0) return(-errno); - - /* The file is accessible but might not be readable. Make it readable. */ - tell_fs(SETUID, PM_PROC_NR, (int) SUPER_USER, (int) SUPER_USER); - - /* Open the file and fstat it. Restore the ids early to handle errors. */ - fd = open(name_buf, O_RDONLY | O_NONBLOCK); - save_errno = errno; /* open might fail, e.g. from ENFILE */ - tell_fs(SETUID, PM_PROC_NR, (int) mp->mp_effuid, (int) mp->mp_effuid); - if (fd < 0) return(-save_errno); - if (fstat(fd, s_buf) < 0) panic(__FILE__,"allowed: fstat failed", NO_NUM); - - /* Only regular files can be executed. */ - if (mask == X_BIT && (s_buf->st_mode & I_TYPE) != I_REGULAR) { - close(fd); - return(EACCES); - } - return(fd); -} /*===========================================================================* * no_sys * @@ -117,7 +78,6 @@ int num; /* number to go with it */ int s; /* Switch to primary console and print panic message. */ - check_sig(mproc[TTY_PROC_NR].mp_pid, SIGTERM); printf("PM panic (%s): %s", who, mess); if (num != NO_NUM) printf(": %d",num); printf("\n"); @@ -126,34 +86,6 @@ int num; /* number to go with it */ sys_exit(SELF); } -/*===========================================================================* - * tell_fs * - *===========================================================================*/ -PUBLIC void tell_fs(what, p1, p2, p3) -int what, p1, p2, p3; -{ -/* This routine is only used by PM to inform FS of certain events: - * tell_fs(CHDIR, slot, dir, 0) - * tell_fs(EXEC, proc, 0, 0) - * tell_fs(EXIT, proc, 0, 0) - * tell_fs(FORK, parent, child, pid) - * tell_fs(SETGID, proc, realgid, effgid) - * tell_fs(SETSID, proc, 0, 0) - * tell_fs(SETUID, proc, realuid, effuid) - * tell_fs(UNPAUSE, proc, signr, 0) - * tell_fs(STIME, time, 0, 0) - * Ignore this call if the FS is already dead, e.g. on shutdown. - */ - message m; - - if ((mproc[FS_PROC_NR].mp_flags & (IN_USE|ZOMBIE)) != IN_USE) - return; - - m.tell_fs_arg1 = p1; - m.tell_fs_arg2 = p2; - m.tell_fs_arg3 = p3; - _taskcall(FS_PROC_NR, what, &m); -} /*===========================================================================* * find_param *