From: Ben Gras Date: Fri, 20 Jan 2006 14:45:27 +0000 (+0000) Subject: Port of freebsd pax. X-Git-Tag: v3.1.2a~419 X-Git-Url: http://zhaoyanbai.com/repos/named-checkzone.html?a=commitdiff_plain;h=bea0bcc2ebf960ab00dbf0a0ac7c50a0c1e13418;p=minix.git Port of freebsd pax. --- diff --git a/commands/Makefile b/commands/Makefile index 2f1c37768..68fd8df94 100755 --- a/commands/Makefile +++ b/commands/Makefile @@ -14,7 +14,7 @@ COREUTILS=gnu-coreutils-5.2.1 VIM=vim63 PERL=perl-5.8.7 -SMALLPROGRAMS=`arch` aal advent ash autil awk bc byacc cawf cron de dhcpd dis88 elle elvis ftp ftpd ftpd200 httpd ibm indent m4 make mdb mined patch ps reboot rlogind scripts sh simple talk talkd telnet telnetd urlget yap zmodem +SMALLPROGRAMS=`arch` aal advent ash autil awk bc byacc cawf cron de dhcpd dis88 elle elvis ftp ftpd ftpd200 httpd ibm indent m4 make mdb mined patch ps reboot rlogind scripts sh simple talk talkd telnet telnetd urlget yap zmodem pax usage: @echo "Usage: make all # Compile all commands" >&2 diff --git a/commands/pax/Makefile b/commands/pax/Makefile new file mode 100644 index 000000000..8a6fb0a54 --- /dev/null +++ b/commands/pax/Makefile @@ -0,0 +1,23 @@ +# Makefile for pax + +CC = exec cc +CFLAGS = -O -D_POSIX_SOURCE -DNET2_STAT=1 -D_MINIX=1 +LDFLAGS= -i + +all: pax + +OBJ = ar_io.o ar_subs.o buf_subs.o cache.o cpio.o file_subs.o ftree.o \ +gen_subs.o getoldopt.o options.o pat_rep.o pax.o sel_subs.o \ +tables.o tar.o tty_subs.o fgetln.o + +pax: $(OBJ) + $(CC) $(LDFLAGS) -o $@ $(OBJ) + install -S 256k $@ + +install: /usr/bin/pax + +/usr/bin/pax: pax + install -cs -o bin pax $@ + +clean: + rm -f *.o *.bak core pax diff --git a/commands/pax/ar_io.c b/commands/pax/ar_io.c new file mode 100644 index 000000000..10d480cfc --- /dev/null +++ b/commands/pax/ar_io.c @@ -0,0 +1,1298 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)ar_io.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "options.h" +#include "extern.h" + +/* + * Routines which deal directly with the archive I/O device/file. + */ + +#define DMOD 0666 /* default mode of created archives */ +#define EXT_MODE O_RDONLY /* open mode for list/extract */ +#define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */ +#define APP_MODE O_RDWR /* mode for append */ + +static char none[] = ""; /* pseudo name for no file */ +static char stdo[] = ""; /* pseudo name for stdout */ +static char stdn[] = ""; /* pseudo name for stdin */ +static int arfd = -1; /* archive file descriptor */ +static int artyp = ISREG; /* archive type: file/FIFO/tape */ +static int arvol = 1; /* archive volume number */ +static int lstrval = -1; /* return value from last i/o */ +static int io_ok; /* i/o worked on volume after resync */ +static int did_io; /* did i/o ever occur on volume? */ +static int done; /* set via tty termination */ +static struct stat arsb; /* stat of archive device at open */ +static int invld_rec; /* tape has out of spec record size */ +static int wr_trail = 1; /* trailer was rewritten in append */ +static int can_unlnk = 0; /* do we unlink null archives? */ +const char *arcname; /* printable name of archive */ +const char *gzip_program; /* name of gzip program */ +static pid_t zpid = -1; /* pid of child process */ + +static int get_phys(void); +extern sigset_t s_mask; +static void ar_start_gzip(int, const char *, int); + +/* + * ar_open() + * Opens the next archive volume. Determines the type of the device and + * sets up block sizes as required by the archive device and the format. + * Note: we may be called with name == NULL on the first open only. + * Return: + * -1 on failure, 0 otherwise + */ + +int +ar_open(const char *name) +{ + struct mtget mb; + + if (arfd != -1) + (void)close(arfd); + arfd = -1; + can_unlnk = did_io = io_ok = invld_rec = 0; + artyp = ISREG; + flcnt = 0; + + /* + * open based on overall operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + if (name == NULL) { + arfd = STDIN_FILENO; + arcname = stdn; + } else if ((arfd = open(name, EXT_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to read on %s", name); + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 0); + break; + case ARCHIVE: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = stdo; + } else if ((arfd = open(name, AR_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to write on %s", name); + else + can_unlnk = 1; + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 1); + break; + case APPND: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = stdo; + } else if ((arfd = open(name, APP_MODE, DMOD)) < 0) + syswarn(0, errno, "Failed open to read/write on %s", + name); + break; + case COPY: + /* + * arfd not used in COPY mode + */ + arcname = none; + lstrval = 1; + return(0); + } + if (arfd < 0) + return(-1); + + if (chdname != NULL) + if (chdir(chdname) != 0) { + syswarn(1, errno, "Failed chdir to %s", chdname); + return(-1); + } + /* + * set up is based on device type + */ + if (fstat(arfd, &arsb) < 0) { + syswarn(0, errno, "Failed stat on %s", arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + if (S_ISDIR(arsb.st_mode)) { + paxwarn(0, "Cannot write an archive on top of a directory %s", + arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + + if (S_ISCHR(arsb.st_mode)) + artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE; + else if (S_ISBLK(arsb.st_mode)) + artyp = ISBLK; + else if ((lseek(arfd, (off_t)0L, SEEK_CUR) == -1) && (errno == ESPIPE)) + artyp = ISPIPE; + else + artyp = ISREG; + + /* + * make sure we beyond any doubt that we only can unlink regular files + * we created + */ + if (artyp != ISREG) + can_unlnk = 0; + /* + * if we are writing, we are done + */ + if (act == ARCHIVE) { + blksz = rdblksz = wrblksz; + lstrval = 1; + return(0); + } + + /* + * set default blksz on read. APPNDs writes rdblksz on the last volume + * On all new archive volumes, we shift to wrblksz (if the user + * specified one, otherwize we will continue to use rdblksz). We + * must to set blocksize based on what kind of device the archive is + * stored. + */ + switch(artyp) { + case ISTAPE: + /* + * Tape drives come in at least two flavors. Those that support + * variable sized records and those that have fixed sized + * records. They must be treated differently. For tape drives + * that support variable sized records, we must make large + * reads to make sure we get the entire record, otherwise we + * will just get the first part of the record (up to size we + * asked). Tapes with fixed sized records may or may not return + * multiple records in a single read. We really do not care + * what the physical record size is UNLESS we are going to + * append. (We will need the physical block size to rewrite + * the trailer). Only when we are appending do we go to the + * effort to figure out the true PHYSICAL record size. + */ + blksz = rdblksz = MAXBLK; + break; + case ISPIPE: + case ISBLK: + case ISCHR: + /* + * Blocksize is not a major issue with these devices (but must + * be kept a multiple of 512). If the user specified a write + * block size, we use that to read. Under append, we must + * always keep blksz == rdblksz. Otherwise we go ahead and use + * the device optimal blocksize as (and if) returned by stat + * and if it is within pax specs. + */ + if ((act == APPND) && wrblksz) { + blksz = rdblksz = wrblksz; + break; + } + +#if 0 /* Not on minix */ + if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) && + ((arsb.st_blksize % BLKMULT) == 0)) + rdblksz = arsb.st_blksize; + else +#endif + rdblksz = DEVBLK; + /* + * For performance go for large reads when we can without harm + */ + if ((act == APPND) || (artyp == ISCHR)) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + case ISREG: + /* + * if the user specified wrblksz works, use it. Under appends + * we must always keep blksz == rdblksz + */ + if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){ + blksz = rdblksz = wrblksz; + break; + } + /* + * See if we can find the blocking factor from the file size + */ + for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT) + if ((arsb.st_size % rdblksz) == 0) + break; + /* + * When we cannot find a match, we may have a flawed archive. + */ + if (rdblksz <= 0) + rdblksz = FILEBLK; + /* + * for performance go for large reads when we can + */ + if (act == APPND) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + default: + /* + * should never happen, worse case, slow... + */ + blksz = rdblksz = BLKMULT; + break; + } + lstrval = 1; + return(0); +} + +/* + * ar_close() + * closes archive device, increments volume number, and prints i/o summary + */ +void +ar_close(void) +{ + int status; + + if (arfd < 0) { + did_io = io_ok = flcnt = 0; + return; + } + + /* + * Close archive file. This may take a LONG while on tapes (we may be + * forced to wait for the rewind to complete) so tell the user what is + * going on (this avoids the user hitting control-c thinking pax is + * broken). + */ + if (vflag && (artyp == ISTAPE)) { + if (vfpart) + (void)putc('\n', listf); + (void)fprintf(listf, + "%s: Waiting for tape drive close to complete...", + argv0); + (void)fflush(listf); + } + + /* + * if nothing was written to the archive (and we created it), we remove + * it + */ + if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) && + (arsb.st_size == 0)) { + (void)unlink(arcname); + can_unlnk = 0; + } + + /* + * for a quick extract/list, pax frequently exits before the child + * process is done + */ + if ((act == LIST || act == EXTRACT) && nflag && zpid > 0) + kill(zpid, SIGINT); + + (void)close(arfd); + + /* Do not exit before child to ensure data integrity */ + if (zpid > 0) + waitpid(zpid, &status, 0); + + if (vflag && (artyp == ISTAPE)) { + (void)fputs("done.\n", listf); + vfpart = 0; + (void)fflush(listf); + } + arfd = -1; + + if (!io_ok && !did_io) { + flcnt = 0; + return; + } + did_io = io_ok = 0; + + /* + * The volume number is only increased when the last device has data + * and we have already determined the archive format. + */ + if (frmt != NULL) + ++arvol; + + if (!vflag) { + flcnt = 0; + return; + } + + /* + * Print out a summary of I/O for this archive volume. + */ + if (vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + + /* + * If we have not determined the format yet, we just say how many bytes + * we have skipped over looking for a header to id. there is no way we + * could have written anything yet. + */ + if (frmt == NULL) { +# ifdef NET2_STAT + (void)fprintf(listf, "%s: unknown format, %lu bytes skipped.\n", + argv0, rdcnt); +# else + (void)fprintf(listf, "%s: unknown format, %ju bytes skipped.\n", + argv0, (uintmax_t)rdcnt); +# endif + (void)fflush(listf); + flcnt = 0; + return; + } + + if (strcmp(NM_CPIO, argv0) == 0) + (void)fprintf(listf, "%llu blocks\n", + (unsigned long)((rdcnt ? rdcnt : wrcnt) / 5120)); + else if (strcmp(NM_TAR, argv0) != 0) + (void)fprintf(listf, +# ifdef NET2_STAT + "%s: %s vol %d, %lu files, %lu bytes read, %lu bytes written.\n", + argv0, frmt->name, arvol-1, flcnt, rdcnt, wrcnt); +# else + "%s: %s vol %d, %ju files, %ju bytes read, %ju bytes written.\n", + argv0, frmt->name, arvol-1, (uintmax_t)flcnt, + (uintmax_t)rdcnt, (uintmax_t)wrcnt); +# endif + (void)fflush(listf); + flcnt = 0; +} + +/* + * ar_drain() + * drain any archive format independent padding from an archive read + * from a socket or a pipe. This is to prevent the process on the + * other side of the pipe from getting a SIGPIPE (pax will stop + * reading an archive once a format dependent trailer is detected). + */ +void +ar_drain(void) +{ + int res; + char drbuf[MAXBLK]; + + /* + * we only drain from a pipe/socket. Other devices can be closed + * without reading up to end of file. We sure hope that pipe is closed + * on the other side so we will get an EOF. + */ + if ((artyp != ISPIPE) || (lstrval <= 0)) + return; + + /* + * keep reading until pipe is drained + */ + while ((res = read(arfd, drbuf, sizeof(drbuf))) > 0) + ; + lstrval = res; +} + +/* + * ar_set_wr() + * Set up device right before switching from read to write in an append. + * device dependent code (if required) to do this should be added here. + * For all archive devices we are already positioned at the place we want + * to start writing when this routine is called. + * Return: + * 0 if all ready to write, -1 otherwise + */ + +int +ar_set_wr(void) +{ + off_t cpos; + + /* + * we must make sure the trailer is rewritten on append, ar_next() + * will stop us if the archive containing the trailer was not written + */ + wr_trail = 0; + + /* + * Add any device dependent code as required here + */ + if (artyp != ISREG) + return(0); + /* + * Ok we have an archive in a regular file. If we were rewriting a + * file, we must get rid of all the stuff after the current offset + * (it was not written by pax). + */ + if (((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) || + (ftruncate(arfd, cpos) < 0)) { + syswarn(1, errno, "Unable to truncate archive file"); + return(-1); + } + return(0); +} + +/* + * ar_app_ok() + * check if the last volume in the archive allows appends. We cannot check + * this until we are ready to write since there is no spec that says all + * volumes in a single archive have to be of the same type... + * Return: + * 0 if we can append, -1 otherwise. + */ + +int +ar_app_ok(void) +{ + if (artyp == ISPIPE) { + paxwarn(1, "Cannot append to an archive obtained from a pipe."); + return(-1); + } + + if (!invld_rec) + return(0); + paxwarn(1,"Cannot append, device record size %d does not support %s spec", + rdblksz, argv0); + return(-1); +} + +/* + * ar_read() + * read up to a specified number of bytes from the archive into the + * supplied buffer. When dealing with tapes we may not always be able to + * read what we want. + * Return: + * Number of bytes in buffer. 0 for end of file, -1 for a read error. + */ + +int +ar_read(char *buf, int cnt) +{ + int res = 0; + + /* + * if last i/o was in error, no more reads until reset or new volume + */ + if (lstrval <= 0) + return(lstrval); + + /* + * how we read must be based on device type + */ + switch (artyp) { + case ISTAPE: + if ((res = read(arfd, buf, cnt)) > 0) { + /* + * CAUTION: tape systems may not always return the same + * sized records so we leave blksz == MAXBLK. The + * physical record size that a tape drive supports is + * very hard to determine in a uniform and portable + * manner. + */ + io_ok = 1; + if (res != rdblksz) { + /* + * Record size changed. If this is happens on + * any record after the first, we probably have + * a tape drive which has a fixed record size + * we are getting multiple records in a single + * read). Watch out for record blocking that + * violates pax spec (must be a multiple of + * BLKMULT). + */ + rdblksz = res; + if (rdblksz % BLKMULT) + invld_rec = 1; + } + return(res); + } + break; + case ISREG: + case ISBLK: + case ISCHR: + case ISPIPE: + default: + /* + * Files are so easy to deal with. These other things cannot + * be trusted at all. So when we are dealing with character + * devices and pipes we just take what they have ready for us + * and return. Trying to do anything else with them runs the + * risk of failure. + */ + if ((res = read(arfd, buf, cnt)) > 0) { + io_ok = 1; + return(res); + } + break; + } + + /* + * We are in trouble at this point, something is broken... + */ + lstrval = res; + if (res < 0) + syswarn(1, errno, "Failed read on archive volume %d", arvol); + else + paxwarn(0, "End of archive volume %d reached", arvol); + return(res); +} + +/* + * ar_write() + * Write a specified number of bytes in supplied buffer to the archive + * device so it appears as a single "block". Deals with errors and tries + * to recover when faced with short writes. + * Return: + * Number of bytes written. 0 indicates end of volume reached and with no + * flaws (as best that can be detected). A -1 indicates an unrecoverable + * error in the archive occured. + */ + +int +ar_write(char *buf, int bsz) +{ + int res; + off_t cpos; + + /* + * do not allow pax to create a "bad" archive. Once a write fails on + * an archive volume prevent further writes to it. + */ + if (lstrval <= 0) + return(lstrval); + + if ((res = write(arfd, buf, bsz)) == bsz) { + wr_trail = 1; + io_ok = 1; + return(bsz); + } + /* + * write broke, see what we can do with it. We try to send any partial + * writes that may violate pax spec to the next archive volume. + */ + if (res < 0) + lstrval = res; + else + lstrval = 0; + + switch (artyp) { + case ISREG: + if ((res > 0) && (res % BLKMULT)) { + /* + * try to fix up partial writes which are not BLKMULT + * in size by forcing the runt record to next archive + * volume + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) + break; + cpos -= (off_t)res; + if (ftruncate(arfd, cpos) < 0) + break; + res = lstrval = 0; + break; + } + if (res >= 0) + break; + /* + * if file is out of space, handle it like a return of 0 + */ + if ((errno == ENOSPC) || (errno == EFBIG) +#ifdef EDQUOT + || (errno == EDQUOT) +#endif + ) + res = lstrval = 0; + break; + case ISTAPE: + case ISCHR: + case ISBLK: + if (res >= 0) + break; + if (errno == EACCES) { + paxwarn(0, "Write failed, archive is write protected."); + res = lstrval = 0; + return(0); + } + /* + * see if we reached the end of media, if so force a change to + * the next volume + */ + if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO)) + res = lstrval = 0; + break; + case ISPIPE: + default: + /* + * we cannot fix errors to these devices + */ + break; + } + + /* + * Better tell the user the bad news... + * if this is a block aligned archive format, we may have a bad archive + * if the format wants the header to start at a BLKMULT boundary. While + * we can deal with the mis-aligned data, it violates spec and other + * archive readers will likely fail. if the format is not block + * aligned, the user may be lucky (and the archive is ok). + */ + if (res >= 0) { + if (res > 0) + wr_trail = 1; + io_ok = 1; + } + + /* + * If we were trying to rewrite the trailer and it didn't work, we + * must quit right away. + */ + if (!wr_trail && (res <= 0)) { + paxwarn(1,"Unable to append, trailer re-write failed. Quitting."); + return(res); + } + + if (res == 0) + paxwarn(0, "End of archive volume %d reached", arvol); + else if (res < 0) + syswarn(1, errno, "Failed write to archive volume: %d", arvol); + else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0)) + paxwarn(0,"WARNING: partial archive write. Archive MAY BE FLAWED"); + else + paxwarn(1,"WARNING: partial archive write. Archive IS FLAWED"); + return(res); +} + +/* + * ar_rdsync() + * Try to move past a bad spot on a flawed archive as needed to continue + * I/O. Clears error flags to allow I/O to continue. + * Return: + * 0 when ok to try i/o again, -1 otherwise. + */ + +int +ar_rdsync(void) +{ + long fsbz; + off_t cpos; + off_t mpos; + struct mtop mb; + + /* + * Fail resync attempts at user request (done) or this is going to be + * an update/append to an existing archive. If last i/o hit media end, + * we need to go to the next volume not try a resync. + */ + if ((done > 0) || (lstrval == 0)) + return(-1); + + if ((act == APPND) || (act == ARCHIVE)) { + paxwarn(1, "Cannot allow updates to an archive with flaws."); + return(-1); + } + if (io_ok) + did_io = 1; + + switch(artyp) { + case ISTAPE: + /* + * if the last i/o was a successful data transfer, we assume + * the fault is just a bad record on the tape that we are now + * past. If we did not get any data since the last resync try + * to move the tape forward one PHYSICAL record past any + * damaged tape section. Some tape drives are stubborn and need + * to be pushed. + */ + if (io_ok) { + io_ok = 0; + lstrval = 1; + break; + } + mb.mt_op = MTFSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) + break; + lstrval = 1; + break; + case ISREG: + case ISCHR: + case ISBLK: + /* + * try to step over the bad part of the device. + */ + io_ok = 0; +#if 0 + /* no blksize on minix */ + if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG)) +#endif + fsbz = BLKMULT; + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) + break; + mpos = fsbz - (cpos % (off_t)fsbz); + if (lseek(arfd, mpos, SEEK_CUR) < 0) + break; + lstrval = 1; + break; + case ISPIPE: + default: + /* + * cannot recover on these archive device types + */ + io_ok = 0; + break; + } + if (lstrval <= 0) { + paxwarn(1, "Unable to recover from an archive read failure."); + return(-1); + } + paxwarn(0, "Attempting to recover from an archive read failure."); + return(0); +} + +/* + * ar_fow() + * Move the I/O position within the archive foward the specified number of + * bytes as supported by the device. If we cannot move the requested + * number of bytes, return the actual number of bytes moved in skipped. + * Return: + * 0 if moved the requested distance, -1 on complete failure, 1 on + * partial move (the amount moved is in skipped) + */ + +int +ar_fow(off_t sksz, off_t *skipped) +{ + off_t cpos; + off_t mpos; + + *skipped = 0; + if (sksz <= 0) + return(0); + + /* + * we cannot move foward at EOF or error + */ + if (lstrval <= 0) + return(lstrval); + + /* + * Safer to read forward on devices where it is hard to find the end of + * the media without reading to it. With tapes we cannot be sure of the + * number of physical blocks to skip (we do not know physical block + * size at this point), so we must only read foward on tapes! + */ + if (artyp != ISREG) + return(0); + + /* + * figure out where we are in the archive + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) >= 0) { + /* + * we can be asked to move farther than there are bytes in this + * volume, if so, just go to file end and let normal buf_fill() + * deal with the end of file (it will go to next volume by + * itself) + */ + if ((mpos = cpos + sksz) > arsb.st_size) { + *skipped = arsb.st_size - cpos; + mpos = arsb.st_size; + } else + *skipped = sksz; + if (lseek(arfd, mpos, SEEK_SET) >= 0) + return(0); + } + syswarn(1, errno, "Forward positioning operation on archive failed"); + lstrval = -1; + return(-1); +} + +/* + * ar_rev() + * move the i/o position within the archive backwards the specified byte + * count as supported by the device. With tapes drives we RESET rdblksz to + * the PHYSICAL blocksize. + * NOTE: We should only be called to move backwards so we can rewrite the + * last records (the trailer) of an archive (APPEND). + * Return: + * 0 if moved the requested distance, -1 on complete failure + */ + +int +ar_rev(off_t sksz) +{ + off_t cpos; + struct mtop mb; + int phyblk; + + /* + * make sure we do not have try to reverse on a flawed archive + */ + if (lstrval < 0) + return(lstrval); + + switch(artyp) { + case ISPIPE: + if (sksz <= 0) + break; + /* + * cannot go backwards on these critters + */ + paxwarn(1, "Reverse positioning on pipes is not supported."); + lstrval = -1; + return(-1); + case ISREG: + case ISBLK: + case ISCHR: + default: + if (sksz <= 0) + break; + + /* + * For things other than files, backwards movement has a very + * high probability of failure as we really do not know the + * true attributes of the device we are talking to (the device + * may not even have the ability to lseek() in any direction). + * First we figure out where we are in the archive. + */ + if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) { + syswarn(1, errno, + "Unable to obtain current archive byte offset"); + lstrval = -1; + return(-1); + } + + /* + * we may try to go backwards past the start when the archive + * is only a single record. If this hapens and we are on a + * multi volume archive, we need to go to the end of the + * previous volume and continue our movement backwards from + * there. + */ + if ((cpos -= sksz) < (off_t)0L) { + if (arvol > 1) { + /* + * this should never happen + */ + paxwarn(1,"Reverse position on previous volume."); + lstrval = -1; + return(-1); + } + cpos = (off_t)0L; + } + if (lseek(arfd, cpos, SEEK_SET) < 0) { + syswarn(1, errno, "Unable to seek archive backwards"); + lstrval = -1; + return(-1); + } + break; + case ISTAPE: + /* + * Calculate and move the proper number of PHYSICAL tape + * blocks. If the sksz is not an even multiple of the physical + * tape size, we cannot do the move (this should never happen). + * (We also cannot handler trailers spread over two vols). + * get_phys() also makes sure we are in front of the filemark. + */ + if ((phyblk = get_phys()) <= 0) { + lstrval = -1; + return(-1); + } + + /* + * make sure future tape reads only go by physical tape block + * size (set rdblksz to the real size). + */ + rdblksz = phyblk; + + /* + * if no movement is required, just return (we must be after + * get_phys() so the physical blocksize is properly set) + */ + if (sksz <= 0) + break; + + /* + * ok we have to move. Make sure the tape drive can do it. + */ + if (sksz % phyblk) { + paxwarn(1, + "Tape drive unable to backspace requested amount"); + lstrval = -1; + return(-1); + } + + /* + * move backwards the requested number of bytes + */ + mb.mt_op = MTBSR; + mb.mt_count = sksz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1,errno, "Unable to backspace tape %d blocks.", + mb.mt_count); + lstrval = -1; + return(-1); + } + break; + } + lstrval = 1; + return(0); +} + +/* + * get_phys() + * Determine the physical block size on a tape drive. We need the physical + * block size so we know how many bytes we skip over when we move with + * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when + * return. + * This is one really SLOW routine... + * Return: + * physical block size if ok (ok > 0), -1 otherwise + */ + +static int +get_phys(void) +{ + int padsz = 0; + int res; + int phyblk; + struct mtop mb; + char scbuf[MAXBLK]; + + /* + * move to the file mark, and then back up one record and read it. + * this should tell us the physical record size the tape is using. + */ + if (lstrval == 1) { + /* + * we know we are at file mark when we get back a 0 from + * read() + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + padsz += res; + if (res < 0) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + } + + /* + * move backwards over the file mark so we are at the end of the + * last record. + */ + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * move backwards so we are in front of the last record and read it to + * get physical tape blocksize. + */ + mb.mt_op = MTBSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1, errno, "Unable to backspace over last tape block."); + return(-1); + } + if ((phyblk = read(arfd, scbuf, sizeof(scbuf))) <= 0) { + syswarn(1, errno, "Cannot determine archive tape blocksize."); + return(-1); + } + + /* + * read foward to the file mark, then back up in front of the filemark + * (this is a bit paranoid, but should be safe to do). + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + ; + if (res < 0) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * set lstrval so we know that the filemark has not been seen + */ + lstrval = 1; + + /* + * return if there was no padding + */ + if (padsz == 0) + return(phyblk); + + /* + * make sure we can move backwards over the padding. (this should + * never fail). + */ + if (padsz % phyblk) { + paxwarn(1, "Tape drive unable to backspace requested amount"); + return(-1); + } + + /* + * move backwards over the padding so the head is where it was when + * we were first called (if required). + */ + mb.mt_op = MTBSR; + mb.mt_count = padsz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) < 0) { + syswarn(1,errno,"Unable to backspace tape over %d pad blocks", + mb.mt_count); + return(-1); + } + return(phyblk); +} + +/* + * ar_next() + * prompts the user for the next volume in this archive. For some devices + * we may allow the media to be changed. Otherwise a new archive is + * prompted for. By pax spec, if there is no controlling tty or an eof is + * read on tty input, we must quit pax. + * Return: + * 0 when ready to continue, -1 when all done + */ + +int +ar_next(void) +{ + char buf[PAXPATHLEN+2]; + static int freeit = 0; + sigset_t o_mask; + + /* + * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so + * things like writing EOF etc will be done) (Watch out ar_close() can + * also be called via a signal handler, so we must prevent a race. + */ + if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) < 0) + syswarn(0, errno, "Unable to set signal mask"); + ar_close(); + if (sigprocmask(SIG_SETMASK, &o_mask, NULL) < 0) + syswarn(0, errno, "Unable to restore signal mask"); + + if (done || !wr_trail || strcmp(NM_TAR, argv0) == 0) + return(-1); + + tty_prnt("\nATTENTION! %s archive volume change required.\n", argv0); + + /* + * if i/o is on stdin or stdout, we cannot reopen it (we do not know + * the name), the user will be forced to type it in. + */ + if (strcmp(arcname, stdo) && strcmp(arcname, stdn) && (artyp != ISREG) + && (artyp != ISPIPE)) { + if (artyp == ISTAPE) { + tty_prnt("%s ready for archive tape volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT TAPE on the tape drive"); + } else { + tty_prnt("%s ready for archive volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT STORAGE MEDIA (if required)"); + } + + if ((act == ARCHIVE) || (act == APPND)) + tty_prnt(" and make sure it is WRITE ENABLED.\n"); + else + tty_prnt("\n"); + + for(;;) { + tty_prnt("Type \"y\" to continue, \".\" to quit %s,", + argv0); + tty_prnt(" or \"s\" to switch to new device.\nIf you"); + tty_prnt(" cannot change storage media, type \"s\"\n"); + tty_prnt("Is the device ready and online? > "); + + if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){ + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + + if ((buf[0] == '\0') || (buf[1] != '\0')) { + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + + switch (buf[0]) { + case 'y': + case 'Y': + /* + * we are to continue with the same device + */ + if (ar_open(arcname) >= 0) + return(0); + tty_prnt("Cannot re-open %s, try again\n", + arcname); + continue; + case 's': + case 'S': + /* + * user wants to open a different device + */ + tty_prnt("Switching to a different archive\n"); + break; + default: + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + break; + } + } else + tty_prnt("Ready for archive volume: %d\n", arvol); + + /* + * have to go to a different archive + */ + for (;;) { + tty_prnt("Input archive name or \".\" to quit %s.\n", argv0); + tty_prnt("Archive name > "); + + if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) { + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + if (buf[0] == '\0') { + tty_prnt("Empty file name, try again\n"); + continue; + } + if (!strcmp(buf, "..")) { + tty_prnt("Illegal file name: .. try again\n"); + continue; + } + if (strlen(buf) > PAXPATHLEN) { + tty_prnt("File name too long, try again\n"); + continue; + } + + /* + * try to open new archive + */ + if (ar_open(buf) >= 0) { + if (freeit) { + (void)free((char *)(uintptr_t)arcname); + freeit = 0; + } + if ((arcname = strdup(buf)) == NULL) { + done = 1; + lstrval = -1; + paxwarn(0, "Cannot save archive name."); + return(-1); + } + freeit = 1; + break; + } + tty_prnt("Cannot open %s, try again\n", buf); + continue; + } + return(0); +} + +/* + * ar_start_gzip() + * starts the gzip compression/decompression process as a child, using magic + * to keep the fd the same in the calling function (parent). + */ +static void +ar_start_gzip(int fd, const char *gzip_prog, int wr) +{ + int fds[2]; + const char *gzip_flags; + + if (pipe(fds) < 0) + err(1, "could not pipe"); + zpid = fork(); + if (zpid < 0) + err(1, "could not fork"); + + /* parent */ + if (zpid) { + if (wr) + dup2(fds[1], fd); + else + dup2(fds[0], fd); + close(fds[0]); + close(fds[1]); + } else { + if (wr) { + dup2(fds[0], STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + gzip_flags = "-c"; + } else { + dup2(fds[1], STDOUT_FILENO); + dup2(fd, STDIN_FILENO); + gzip_flags = "-dc"; + } + close(fds[0]); + close(fds[1]); + if (execlp(gzip_prog, gzip_prog, gzip_flags, + (char *)NULL) < 0) + err(1, "could not exec"); + /* NOTREACHED */ + } +} diff --git a/commands/pax/ar_subs.c b/commands/pax/ar_subs.c new file mode 100644 index 000000000..c78b2173b --- /dev/null +++ b/commands/pax/ar_subs.c @@ -0,0 +1,1237 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)ar_subs.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" + +static void wr_archive(ARCHD *, int is_app); +static int get_arc(void); +static int next_head(ARCHD *); +extern sigset_t s_mask; + +/* + * Routines which control the overall operation modes of pax as specified by + * the user: list, append, read ... + */ + +static char hdbuf[BLKMULT]; /* space for archive header on read */ +u_long flcnt; /* number of files processed */ + +/* + * list() + * list the contents of an archive which match user supplied pattern(s) + * (no pattern matches all). + */ + +void +list(void) +{ + ARCHD *arcn; + int res; + ARCHD archd; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine. We + * also save current time for ls_list() so we do not make a system + * call for each file we need to print. If verbose (vflag) start up + * the name and group caches. + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0)) + return; + + if (vflag && ((uidtb_start() < 0) || (gidtb_start() < 0))) + return; + + now = time(NULL); + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check for pattern, and user specified options match. + * When all patterns are matched we are done. + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res == 0) && (sel_chk(arcn) == 0)) { + /* + * pattern resulted in a selected file + */ + if (pat_sel(arcn) < 0) + break; + + /* + * modify the name as requested by the user if name + * survives modification, do a listing of the file + */ + if ((res = mod_name(arcn)) < 0) + break; + if (res == 0) + ls_list(arcn, now, stdout); + } + + /* + * skip to next archive format header using values calculated + * by the format header read routine + */ + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + } + + /* + * all done, let format have a chance to cleanup, and make sure that + * the patterns supplied by the user were all matched + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + pat_chk(); +} + +/* + * extract() + * extract the member(s) of an archive as specified by user supplied + * pattern(s) (no patterns extracts all members) + */ + +void +extract(void) +{ + ARCHD *arcn; + int res; + off_t cnt; + ARCHD archd; + struct stat sb; + int fd; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine; + * start up the directory modification time and access mode database + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + now = time(NULL); + + /* + * step through each entry on the archive until the format read routine + * says it is done + */ + while (next_head(arcn) == 0) { + + /* + * check for pattern, and user specified options match. When + * all the patterns are matched we are done + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res > 0) || (sel_chk(arcn) != 0)) { + /* + * file is not selected. skip past any file data and + * padding and go back for the next archive member + */ + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * with -u or -D only extract when the archive member is newer + * than the file with the same name in the file system (nos + * test of being the same type is required). + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this operation can be confusing to the + * user who might expect the test to be done on an existing + * file AFTER the name mod. In honesty the pax spec is probably + * flawed in this respect. + */ + if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) { + if (uflag && Dflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (Dflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (arcn->sb.st_mtime <= sb.st_mtime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } + + /* + * this archive member is now been selected. modify the name. + */ + if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if (res > 0) { + /* + * a bad name mod, skip and purge name from link table + */ + purg_lnk(arcn); + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * Non standard -Y and -Z flag. When the existing file is + * same age or newer skip + */ + if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { + if (Yflag && Zflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (Yflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } else if (arcn->sb.st_mtime <= sb.st_mtime) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)fputs(arcn->name, listf); + vfpart = 1; + } + } + + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + if (chdir(arcn->pat->chdname) != 0) + syswarn(1, errno, "Cannot chdir to %s", + arcn->pat->chdname); + + /* + * all ok, extract this member based on type + */ + if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { + /* + * process archive members that are not regular files. + * throw out padding and any data that might follow the + * header (as determined by the format). + */ + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + + (void)rd_skip(arcn->skip + arcn->pad); + if (res < 0) + purg_lnk(arcn); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + /* + * we have a file with data here. If we can not create it, skip + * over the data and purge the name from hard link table + */ + if ((fd = file_creat(arcn)) < 0) { + (void)rd_skip(arcn->skip + arcn->pad); + purg_lnk(arcn); + continue; + } + /* + * extract the file from the archive and skip over padding and + * any unprocessed data + */ + res = (*frmt->rd_data)(arcn, fd, &cnt); + file_close(arcn, fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (!res) + (void)rd_skip(cnt + arcn->pad); + + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + if (fchdir(cwdfd) != 0) + syswarn(1, errno, + "Can't fchdir to starting directory"); + } + + /* + * all done, restore directory modes and times as required; make sure + * all patterns supplied by the user were matched; block off signals + * to avoid chance for multiple entry into the cleanup code. + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + proc_dir(); + pat_chk(); +} + +/* + * wr_archive() + * Write an archive. used in both creating a new archive and appends on + * previously written archive. + */ + +static void +wr_archive(ARCHD *arcn, int is_app) +{ + int res; + int hlk; + int wr_one; + off_t cnt; + int (*wrf)(ARCHD *); + int fd = -1; + time_t now; + + /* + * if this format supports hard link storage, start up the database + * that detects them. + */ + if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) + return; + + /* + * start up the file traversal code and format specific write + */ + if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0)) + return; + wrf = frmt->wr; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * if this not append, and there are no files, we do no write a trailer + */ + wr_one = is_app; + + now = time(NULL); + + /* + * while there are files to archive, process them one at at time + */ + while (next_file(arcn) == 0) { + /* + * check if this file meets user specified options match. + */ + if (sel_chk(arcn) != 0) { + ftree_notsel(); + continue; + } + fd = -1; + if (uflag) { + /* + * only archive if this file is newer than a file with + * the same name that is already stored on the archive + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) + continue; + } + + /* + * this file is considered selected now. see if this is a hard + * link to a file already stored + */ + ftree_sel(arcn); + if (hlk && (chk_lnk(arcn) < 0)) + break; + + if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) || + (arcn->type == PAX_CTG)) { + /* + * we will have to read this file. by opening it now we + * can avoid writing a header to the archive for a file + * we were later unable to read (we also purge it from + * the link table). + */ + if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1,errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + } + + /* + * Now modify the name as requested by the user + */ + if ((res = mod_name(arcn)) < 0) { + /* + * name modification says to skip this file, close the + * file and purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + break; + } + + if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { + /* + * unable to obtain the crc we need, close the file, + * purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + continue; + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)fputs(arcn->name, listf); + vfpart = 1; + } + } + ++flcnt; + + /* + * looks safe to store the file, have the format specific + * routine write routine store the file header on the archive + */ + if ((res = (*wrf)(arcn)) < 0) { + rdfile_close(arcn, &fd); + break; + } + wr_one = 1; + if (res > 0) { + /* + * format write says no file data needs to be stored + * so we are done messing with this file + */ + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + rdfile_close(arcn, &fd); + continue; + } + + /* + * Add file data to the archive, quit on write error. if we + * cannot write the entire file contents to the archive we + * must pad the archive to replace the missing file data + * (otherwise during an extract the file header for the file + * which FOLLOWS this one will not be where we expect it to + * be). + */ + res = (*frmt->wr_data)(arcn, fd, &cnt); + rdfile_close(arcn, &fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (res < 0) + break; + + /* + * pad as required, cnt is number of bytes not written + */ + if (((cnt > 0) && (wr_skip(cnt) < 0)) || + ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) + break; + } + + /* + * tell format to write trailer; pad to block boundary; reset directory + * mode/access times, and check if all patterns supplied by the user + * were matched. block off signals to avoid chance for multiple entry + * into the cleanup code + */ + if (wr_one) { + (*frmt->end_wr)(); + wr_fin(); + } + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + if (tflag) + proc_dir(); + ftree_chk(); +} + +/* + * append() + * Add file to previously written archive. Archive format specified by the + * user must agree with archive. The archive is read first to collect + * modification times (if -u) and locate the archive trailer. The archive + * is positioned in front of the record with the trailer and wr_archive() + * is called to add the new members. + * PAX IMPLEMENTATION DETAIL NOTE: + * -u is implemented by adding the new members to the end of the archive. + * Care is taken so that these do not end up as links to the older + * version of the same file already stored in the archive. It is expected + * when extraction occurs these newer versions will over-write the older + * ones stored "earlier" in the archive (this may be a bad assumption as + * it depends on the implementation of the program doing the extraction). + * It is really difficult to splice in members without either re-writing + * the entire archive (from the point were the old version was), or having + * assistance of the format specification in terms of a special update + * header that invalidates a previous archive record. The POSIX spec left + * the method used to implement -u unspecified. This pax is able to + * over write existing files that it creates. + */ + +void +append(void) +{ + ARCHD *arcn; + int res; + ARCHD archd; + FSUB *orgfrmt; + int udev; + off_t tlen; + + arcn = &archd; + orgfrmt = frmt; + + /* + * Do not allow an append operation if the actual archive is of a + * different format than the user specified format. + */ + if (get_arc() < 0) + return; + if ((orgfrmt != NULL) && (orgfrmt != frmt)) { + paxwarn(1, "Cannot mix current archive format %s with %s", + frmt->name, orgfrmt->name); + return; + } + + /* + * pass the format any options and start up format + */ + if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) + return; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files we see. + */ + if (uflag && (ftime_start() < 0)) + return; + + /* + * some archive formats encode hard links by recording the device and + * file serial number (inode) but copy the file anyway (multiple times) + * to the archive. When we append, we run the risk that newly added + * files may have the same device and inode numbers as those recorded + * on the archive but during a previous run. If this happens, when the + * archive is extracted we get INCORRECT hard links. We avoid this by + * remapping the device numbers so that newly added files will never + * use the same device number as one found on the archive. remapping + * allows new members to safely have links among themselves. remapping + * also avoids problems with file inode (serial number) truncations + * when the inode number is larger than storage space in the archive + * header. See the remap routines for more details. + */ + if ((udev = frmt->udev) && (dev_start() < 0)) + return; + + /* + * reading the archive may take a long time. If verbose tell the user + */ + if (vflag) { + (void)fprintf(listf, + "%s: Reading archive to position at the end...", argv0); + vfpart = 1; + } + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check if this file meets user specified options. + */ + if (sel_chk(arcn) != 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + + if (uflag) { + /* + * see if this is the newest version of this file has + * already been seen, if so skip. + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + } + + /* + * Store this device number. Device numbers seen during the + * read phase of append will cause newly appended files with a + * device number seen in the old part of the archive to be + * remapped to an unused device number. + */ + if ((udev && (add_dev(arcn) < 0)) || + (rd_skip(arcn->skip + arcn->pad) == 1)) + break; + } + + /* + * done, finish up read and get the number of bytes to back up so we + * can add new members. The format might have used the hard link table, + * purge it. + */ + tlen = (*frmt->end_rd)(); + lnk_end(); + + /* + * try to position for write, if this fails quit. if any error occurs, + * we will refuse to write + */ + if (appnd_start(tlen) < 0) + return; + + /* + * tell the user we are done reading. + */ + if (vflag && vfpart) { + (void)fputs("done.\n", listf); + vfpart = 0; + } + + /* + * go to the writing phase to add the new members + */ + wr_archive(arcn, 1); +} + +/* + * archive() + * write a new archive + */ + +void +archive(void) +{ + ARCHD archd; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files; set up for writing; pass the format any + * options write the archive + */ + if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) + return; + if ((*frmt->options)() < 0) + return; + + wr_archive(&archd, 0); +} + +/* + * copy() + * copy files from one part of the file system to another. this does not + * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an + * archive was written and then extracted in the destination directory + * (except the files are forced to be under the destination directory). + */ + +void +copy(void) +{ + ARCHD *arcn; + int res; + int fddest; + char *dest_pt; + int dlen; + int drem; + int fdsrc = -1; + struct stat sb; + ARCHD archd; + char dirbuf[PAXPATHLEN+1]; + + arcn = &archd; + /* + * set up the destination dir path and make sure it is a directory. We + * make sure we have a trailing / on the destination + */ + dlen = l_strncpy(dirbuf, dirptr, sizeof(dirbuf) - 1); + dest_pt = dirbuf + dlen; + if (*(dest_pt-1) != '/') { + *dest_pt++ = '/'; + ++dlen; + } + *dest_pt = '\0'; + drem = PAXPATHLEN - dlen; + + if (stat(dirptr, &sb) < 0) { + syswarn(1, errno, "Cannot access destination directory %s", + dirptr); + return; + } + if (!S_ISDIR(sb.st_mode)) { + paxwarn(1, "Destination is not a directory %s", dirptr); + return; + } + + /* + * start up the hard link table; file traversal routines and the + * modification time and access mode database + */ + if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * set up to cp file trees + */ + cp_start(); + + /* + * while there are files to archive, process them + */ + while (next_file(arcn) == 0) { + fdsrc = -1; + + /* + * check if this file meets user specified options + */ + if (sel_chk(arcn) != 0) { + ftree_notsel(); + continue; + } + + /* + * if there is already a file in the destination directory with + * the same name and it is newer, skip the one stored on the + * archive. + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this can be confusing to the user who + * might expect the test to be done on an existing file AFTER + * the name mod. In honesty the pax spec is probably flawed in + * this respect + */ + if (uflag || Dflag) { + /* + * create the destination name + */ + if (*(arcn->name) == '/') + res = 1; + else + res = 0; + if ((arcn->nlen - res) > drem) { + paxwarn(1, "Destination pathname too long %s", + arcn->name); + continue; + } + (void)strncpy(dest_pt, arcn->name + res, drem); + dirbuf[PAXPATHLEN] = '\0'; + + /* + * if existing file is same age or newer skip + */ + res = lstat(dirbuf, &sb); + *dest_pt = '\0'; + + if (res == 0) { + if (uflag && Dflag) { + if ((arcn->sb.st_mtime<=sb.st_mtime) && + (arcn->sb.st_ctime<=sb.st_ctime)) + continue; + } else if (Dflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) + continue; + } else if (arcn->sb.st_mtime <= sb.st_mtime) + continue; + } + } + + /* + * this file is considered selected. See if this is a hard link + * to a previous file; modify the name as requested by the + * user; set the final destination. + */ + ftree_sel(arcn); + if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { + /* + * skip file, purge from link table + */ + purg_lnk(arcn); + continue; + } + + /* + * Non standard -Y and -Z flag. When the exisiting file is + * same age or newer skip + */ + if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { + if (Yflag && Zflag) { + if ((arcn->sb.st_mtime <= sb.st_mtime) && + (arcn->sb.st_ctime <= sb.st_ctime)) + continue; + } else if (Yflag) { + if (arcn->sb.st_ctime <= sb.st_ctime) + continue; + } else if (arcn->sb.st_mtime <= sb.st_mtime) + continue; + } + + if (vflag) { + (void)fputs(arcn->name, listf); + vfpart = 1; + } + ++flcnt; + + /* + * try to create a hard link to the src file if requested + * but make sure we are not trying to overwrite ourselves. + */ + if (lflag) + res = cross_lnk(arcn); + else + res = chk_same(arcn); + if (res <= 0) { + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to create a new file + */ + if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { + /* + * create a link or special file + */ + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + if (res < 0) + purg_lnk(arcn); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to copy a regular file to the destination directory. + * first open source file and then create the destination file + */ + if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1, errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + if ((fddest = file_creat(arcn)) < 0) { + rdfile_close(arcn, &fdsrc); + purg_lnk(arcn); + continue; + } + + /* + * copy source file data to the destination file + */ + cp_file(arcn, fdsrc, fddest); + file_close(arcn, fddest); + rdfile_close(arcn, &fdsrc); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + } + + /* + * restore directory modes and times as required; make sure all + * patterns were selected block off signals to avoid chance for + * multiple entry into the cleanup code. + */ + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(); + proc_dir(); + ftree_chk(); +} + +/* + * next_head() + * try to find a valid header in the archive. Uses format specific + * routines to extract the header and id the trailer. Trailers may be + * located within a valid header or in an invalid header (the location + * is format specific. The inhead field from the option table tells us + * where to look for the trailer). + * We keep reading (and resyncing) until we get enough contiguous data + * to check for a header. If we cannot find one, we shift by a byte + * add a new byte from the archive to the end of the buffer and try again. + * If we get a read error, we throw out what we have (as we must have + * contiguous data) and start over again. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if we got a header, -1 if we are unable to ever find another one + * (we reached the end of input, or we reached the limit on retries. see + * the specs for rd_wrbuf() for more details) + */ + +static int +next_head(ARCHD *arcn) +{ + int ret; + char *hdend; + int res; + int shftsz; + int hsz; + int in_resync = 0; /* set when we are in resync mode */ + int cnt = 0; /* counter for trailer function */ + int first = 1; /* on 1st read, EOF isn't premature. */ + + /* + * set up initial conditions, we want a whole frmt->hsz block as we + * have no data yet. + */ + res = hsz = frmt->hsz; + hdend = hdbuf; + shftsz = hsz - 1; + for(;;) { + /* + * keep looping until we get a contiguous FULL buffer + * (frmt->hsz is the proper size) + */ + for (;;) { + if ((ret = rd_wrbuf(hdend, res)) == res) + break; + + /* + * If we read 0 bytes (EOF) from an archive when we + * expect to find a header, we have stepped upon + * an archive without the customary block of zeroes + * end marker. It's just stupid to error out on + * them, so exit gracefully. + */ + if (first && ret == 0) + return(-1); + first = 0; + + /* + * some kind of archive read problem, try to resync the + * storage device, better give the user the bad news. + */ + if ((ret == 0) || (rd_sync() < 0)) { + paxwarn(1,"Premature end of file on archive read"); + return(-1); + } + if (!in_resync) { + if (act == APPND) { + paxwarn(1, + "Archive I/O error, cannot continue"); + return(-1); + } + paxwarn(1,"Archive I/O error. Trying to recover."); + ++in_resync; + } + + /* + * oh well, throw it all out and start over + */ + res = hsz; + hdend = hdbuf; + } + + /* + * ok we have a contiguous buffer of the right size. Call the + * format read routine. If this was not a valid header and this + * format stores trailers outside of the header, call the + * format specific trailer routine to check for a trailer. We + * have to watch out that we do not mis-identify file data or + * block padding as a header or trailer. Format specific + * trailer functions must NOT check for the trailer while we + * are running in resync mode. Some trailer functions may tell + * us that this block cannot contain a valid header either, so + * we then throw out the entire block and start over. + */ + if ((*frmt->rd)(arcn, hdbuf) == 0) + break; + + if (!frmt->inhead) { + /* + * this format has trailers outside of valid headers + */ + if ((ret = (*frmt->trail_tar)(hdbuf,in_resync,&cnt)) == 0){ + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + if (ret == 1) { + /* + * we are in resync and we were told to throw + * the whole block out because none of the + * bytes in this block can be used to form a + * valid header + */ + res = hsz; + hdend = hdbuf; + continue; + } + } + + /* + * Brute force section. + * not a valid header. We may be able to find a header yet. So + * we shift over by one byte, and set up to read one byte at a + * time from the archive and place it at the end of the buffer. + * We will keep moving byte at a time until we find a header or + * get a read error and have to start over. + */ + if (!in_resync) { + if (act == APPND) { + paxwarn(1,"Unable to append, archive header flaw"); + return(-1); + } + paxwarn(1,"Invalid header, starting valid header search."); + ++in_resync; + } + memmove(hdbuf, hdbuf+1, shftsz); + res = 1; + hdend = hdbuf + shftsz; + } + + /* + * ok got a valid header, check for trailer if format encodes it in the + * the header. + */ + if (frmt->inhead && ((*frmt->trail_cpio)(arcn) == 0)) { + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + ++flcnt; + return(0); +} + +/* + * get_arc() + * Figure out what format an archive is. Handles archive with flaws by + * brute force searches for a legal header in any supported format. The + * format id routines have to be careful to NOT mis-identify a format. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if archive found -1 otherwise + */ + +static int +get_arc(void) +{ + int i; + int hdsz = 0; + int res; + int minhd = BLKMULT; + char *hdend; + int notice = 0; + + /* + * find the smallest header size in all archive formats and then set up + * to read the archive. + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].hsz < minhd) + minhd = fsub[ford[i]].hsz; + } + if (rd_start() < 0) + return(-1); + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + for(;;) { + for (;;) { + /* + * fill the buffer with at least the smallest header + */ + i = rd_wrbuf(hdend, res); + if (i > 0) + hdsz += i; + if (hdsz >= minhd) + break; + + /* + * if we cannot recover from a read error quit + */ + if ((i == 0) || (rd_sync() < 0)) + goto out; + + /* + * when we get an error none of the data we already + * have can be used to create a legal header (we just + * got an error in the middle), so we throw it all out + * and refill the buffer with fresh data. + */ + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + if (!notice) { + if (act == APPND) + return(-1); + paxwarn(1,"Cannot identify format. Searching..."); + ++notice; + } + } + + /* + * we have at least the size of the smallest header in any + * archive format. Look to see if we have a match. The array + * ford[] is used to specify the header id order to reduce the + * chance of incorrectly id'ing a valid header (some formats + * may be subsets of each other and the order would then be + * important). + */ + for (i = 0; ford[i] >= 0; ++i) { + if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0) + continue; + frmt = &(fsub[ford[i]]); + /* + * yuck, to avoid slow special case code in the extract + * routines, just push this header back as if it was + * not seen. We have left extra space at start of the + * buffer for this purpose. This is a bit ugly, but + * adding all the special case code is far worse. + */ + pback(hdbuf, hdsz); + return(0); + } + + /* + * We have a flawed archive, no match. we start searching, but + * we never allow additions to flawed archives + */ + if (!notice) { + if (act == APPND) + return(-1); + paxwarn(1, "Cannot identify format. Searching..."); + ++notice; + } + + /* + * brute force search for a header that we can id. + * we shift through byte at a time. this is slow, but we cannot + * determine the nature of the flaw in the archive in a + * portable manner + */ + if (--hdsz > 0) { + memmove(hdbuf, hdbuf+1, hdsz); + res = BLKMULT - hdsz; + hdend = hdbuf + hdsz; + } else { + res = BLKMULT; + hdend = hdbuf; + hdsz = 0; + } + } + + out: + /* + * we cannot find a header, bow, apologize and quit + */ + paxwarn(1, "Sorry, unable to determine archive format."); + return(-1); +} diff --git a/commands/pax/buf_subs.c b/commands/pax/buf_subs.c new file mode 100644 index 000000000..0eb53727d --- /dev/null +++ b/commands/pax/buf_subs.c @@ -0,0 +1,993 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)buf_subs.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" + +/* + * routines which implement archive and file buffering + */ + +#define MINFBSZ 512 /* default block size for hole detect */ +#define MAXFLT 10 /* default media read error limit */ + +/* + * Need to change bufmem to dynamic allocation when the upper + * limit on blocking size is removed (though that will violate pax spec) + * MAXBLK define and tests will also need to be updated. + */ +static char bufmem[MAXBLK+BLKMULT]; /* i/o buffer + pushback id space */ +static char *buf; /* normal start of i/o buffer */ +static char *bufend; /* end or last char in i/o buffer */ +static char *bufpt; /* read/write point in i/o buffer */ +int blksz = MAXBLK; /* block input/output size in bytes */ +int wrblksz; /* user spec output size in bytes */ +int maxflt = MAXFLT; /* MAX consecutive media errors */ +int rdblksz; /* first read blksize (tapes only) */ +off_t wrlimit; /* # of bytes written per archive vol */ +off_t wrcnt; /* # of bytes written on current vol */ +off_t rdcnt; /* # of bytes read on current vol */ + +/* + * wr_start() + * set up the buffering system to operate in a write mode + * Return: + * 0 if ok, -1 if the user specified write block size violates pax spec + */ + +int +wr_start(void) +{ + buf = &(bufmem[BLKMULT]); + /* + * Check to make sure the write block size meets pax specs. If the user + * does not specify a blocksize, we use the format default blocksize. + * We must be picky on writes, so we do not allow the user to create an + * archive that might be hard to read elsewhere. If all ok, we then + * open the first archive volume + */ + if (!wrblksz) + wrblksz = frmt->bsz; + if (wrblksz > MAXBLK) { + paxwarn(1, "Write block size of %d too large, maximum is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + paxwarn(1, "Write block size of %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + if (wrblksz > MAXBLK_POSIX) { + paxwarn(0, "Write block size of %d larger than POSIX max %d, archive may not be portable", + wrblksz, MAXBLK_POSIX); + return(-1); + } + + /* + * we only allow wrblksz to be used with all archive operations + */ + blksz = rdblksz = wrblksz; + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + wrcnt = 0; + bufend = buf + wrblksz; + bufpt = buf; + return(0); +} + +/* + * rd_start() + * set up buffering system to read an archive + * Return: + * 0 if ok, -1 otherwise + */ + +int +rd_start(void) +{ + /* + * leave space for the header pushback (see get_arc()). If we are + * going to append and user specified a write block size, check it + * right away + */ + buf = &(bufmem[BLKMULT]); + if ((act == APPND) && wrblksz) { + if (wrblksz > MAXBLK) { + paxwarn(1,"Write block size %d too large, maximum is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + paxwarn(1, "Write block size %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + } + + /* + * open the archive + */ + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + bufend = buf + rdblksz; + bufpt = bufend; + rdcnt = 0; + return(0); +} + +/* + * cp_start() + * set up buffer system for copying within the file system + */ + +void +cp_start(void) +{ + buf = &(bufmem[BLKMULT]); + rdblksz = blksz = MAXBLK; +} + +/* + * appnd_start() + * Set up the buffering system to append new members to an archive that + * was just read. The last block(s) of an archive may contain a format + * specific trailer. To append a new member, this trailer has to be + * removed from the archive. The first byte of the trailer is replaced by + * the start of the header of the first file added to the archive. The + * format specific end read function tells us how many bytes to move + * backwards in the archive to be positioned BEFORE the trailer. Two + * different postions have to be adjusted, the O.S. file offset (e.g. the + * position of the tape head) and the write point within the data we have + * stored in the read (soon to become write) buffer. We may have to move + * back several records (the number depends on the size of the archive + * record and the size of the format trailer) to read up the record where + * the first byte of the trailer is recorded. Trailers may span (and + * overlap) record boundries. + * We first calculate which record has the first byte of the trailer. We + * move the OS file offset back to the start of this record and read it + * up. We set the buffer write pointer to be at this byte (the byte where + * the trailer starts). We then move the OS file pointer back to the + * start of this record so a flush of this buffer will replace the record + * in the archive. + * A major problem is rewriting this last record. For archives stored + * on disk files, this is trival. However, many devices are really picky + * about the conditions under which they will allow a write to occur. + * Often devices restrict the conditions where writes can be made writes, + * so it may not be feasable to append archives stored on all types of + * devices. + * Return: + * 0 for success, -1 for failure + */ + +int +appnd_start(off_t skcnt) +{ + int res; + off_t cnt; + + if (exit_val != 0) { + paxwarn(0, "Cannot append to an archive that may have flaws."); + return(-1); + } + /* + * if the user did not specify a write blocksize, inherit the size used + * in the last archive volume read. (If a is set we still use rdblksz + * until next volume, cannot shift sizes within a single volume). + */ + if (!wrblksz) + wrblksz = blksz = rdblksz; + else + blksz = rdblksz; + + /* + * make sure that this volume allows appends + */ + if (ar_app_ok() < 0) + return(-1); + + /* + * Calculate bytes to move back and move in front of record where we + * need to start writing from. Remember we have to add in any padding + * that might be in the buffer after the trailer in the last block. We + * travel skcnt + padding ROUNDED UP to blksize. + */ + skcnt += bufend - bufpt; + if ((cnt = (skcnt/blksz) * blksz) < skcnt) + cnt += blksz; + if (ar_rev((off_t)cnt) < 0) + goto out; + + /* + * We may have gone too far if there is valid data in the block we are + * now in front of, read up the block and position the pointer after + * the valid data. + */ + if ((cnt -= skcnt) > 0) { + /* + * watch out for stupid tape drives. ar_rev() will set rdblksz + * to be real physical blocksize so we must loop until we get + * the old rdblksz (now in blksz). If ar_rev() fouls up the + * determination of the physical block size, we will fail. + */ + bufpt = buf; + bufend = buf + blksz; + while (bufpt < bufend) { + if ((res = ar_read(bufpt, rdblksz)) <= 0) + goto out; + bufpt += res; + } + if (ar_rev((off_t)(bufpt - buf)) < 0) + goto out; + bufpt = buf + cnt; + bufend = buf + blksz; + } else { + /* + * buffer is empty + */ + bufend = buf + blksz; + bufpt = buf; + } + rdblksz = blksz; + rdcnt -= skcnt; + wrcnt = 0; + + /* + * At this point we are ready to write. If the device requires special + * handling to write at a point were previously recorded data resides, + * that is handled in ar_set_wr(). From now on we operate under normal + * ARCHIVE mode (write) conditions + */ + if (ar_set_wr() < 0) + return(-1); + act = ARCHIVE; + return(0); + + out: + paxwarn(1, "Unable to rewrite archive trailer, cannot append."); + return(-1); +} + +/* + * rd_sync() + * A read error occurred on this archive volume. Resync the buffer and + * try to reset the device (if possible) so we can continue to read. Keep + * trying to do this until we get a valid read, or we reach the limit on + * consecutive read faults (at which point we give up). The user can + * adjust the read error limit through a command line option. + * Returns: + * 0 on success, and -1 on failure + */ + +int +rd_sync(void) +{ + int errcnt = 0; + int res; + + /* + * if the user says bail out on first fault, we are out of here... + */ + if (maxflt == 0) + return(-1); + if (act == APPND) { + paxwarn(1, "Unable to append when there are archive read errors."); + return(-1); + } + + /* + * poke at device and try to get past media error + */ + if (ar_rdsync() < 0) { + if (ar_next() < 0) + return(-1); + else + rdcnt = 0; + } + + for (;;) { + if ((res = ar_read(buf, blksz)) > 0) { + /* + * All right! got some data, fill that buffer + */ + bufpt = buf; + bufend = buf + res; + rdcnt += res; + return(0); + } + + /* + * Oh well, yet another failed read... + * if error limit reached, ditch. o.w. poke device to move past + * bad media and try again. if media is badly damaged, we ask + * the poor (and upset user at this point) for the next archive + * volume. remember the goal on reads is to get the most we + * can extract out of the archive. + */ + if ((maxflt > 0) && (++errcnt > maxflt)) + paxwarn(0,"Archive read error limit (%d) reached",maxflt); + else if (ar_rdsync() == 0) + continue; + if (ar_next() < 0) + break; + rdcnt = 0; + errcnt = 0; + } + return(-1); +} + +/* + * pback() + * push the data used during the archive id phase back into the I/O + * buffer. This is required as we cannot be sure that the header does NOT + * overlap a block boundry (as in the case we are trying to recover a + * flawed archived). This was not designed to be used for any other + * purpose. (What software engineering, HA!) + * WARNING: do not even THINK of pback greater than BLKMULT, unless the + * pback space is increased. + */ + +void +pback(char *pt, int cnt) +{ + bufpt -= cnt; + memcpy(bufpt, pt, cnt); + return; +} + +/* + * rd_skip() + * skip foward in the archive during an archive read. Used to get quickly + * past file data and padding for files the user did NOT select. + * Return: + * 0 if ok, -1 failure, and 1 when EOF on the archive volume was detected. + */ + +int +rd_skip(off_t skcnt) +{ + off_t res; + off_t cnt; + off_t skipped = 0; + + /* + * consume what data we have in the buffer. If we have to move foward + * whole records, we call the low level skip function to see if we can + * move within the archive without doing the expensive reads on data we + * do not want. + */ + if (skcnt == 0) + return(0); + res = MIN((bufend - bufpt), skcnt); + bufpt += res; + skcnt -= res; + + /* + * if skcnt is now 0, then no additional i/o is needed + */ + if (skcnt == 0) + return(0); + + /* + * We have to read more, calculate complete and partial record reads + * based on rdblksz. we skip over "cnt" complete records + */ + res = skcnt%rdblksz; + cnt = (skcnt/rdblksz) * rdblksz; + + /* + * if the skip fails, we will have to resync. ar_fow will tell us + * how much it can skip over. We will have to read the rest. + */ + if (ar_fow(cnt, &skipped) < 0) + return(-1); + res += cnt - skipped; + rdcnt += skipped; + + /* + * what is left we have to read (which may be the whole thing if + * ar_fow() told us the device can only read to skip records); + */ + while (res > 0L) { + cnt = bufend - bufpt; + /* + * if the read fails, we will have to resync + */ + if ((cnt <= 0) && ((cnt = buf_fill()) < 0)) + return(-1); + if (cnt == 0) + return(1); + cnt = MIN(cnt, res); + bufpt += cnt; + res -= cnt; + } + return(0); +} + +/* + * wr_fin() + * flush out any data (and pad if required) the last block. We always pad + * with zero (even though we do not have to). Padding with 0 makes it a + * lot easier to recover if the archive is damaged. zero paddding SHOULD + * BE a requirement.... + */ + +void +wr_fin(void) +{ + if (bufpt > buf) { + memset(bufpt, 0, bufend - bufpt); + bufpt = bufend; + (void)buf_flush(blksz); + } +} + +/* + * wr_rdbuf() + * fill the write buffer from data passed to it in a buffer (usually used + * by format specific write routines to pass a file header). On failure we + * punt. We do not allow the user to continue to write flawed archives. + * We assume these headers are not very large (the memory copy we use is + * a bit expensive). + * Return: + * 0 if buffer was filled ok, -1 o.w. (buffer flush failure) + */ + +int +wr_rdbuf(char *out, int outcnt) +{ + int cnt; + + /* + * while there is data to copy copy into the write buffer. when the + * write buffer fills, flush it to the archive and continue + */ + while (outcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + /* + * only move what we have space for + */ + cnt = MIN(cnt, outcnt); + memcpy(bufpt, out, cnt); + bufpt += cnt; + out += cnt; + outcnt -= cnt; + } + return(0); +} + +/* + * rd_wrbuf() + * copy from the read buffer into a supplied buffer a specified number of + * bytes. If the read buffer is empty fill it and continue to copy. + * usually used to obtain a file header for processing by a format + * specific read routine. + * Return + * number of bytes copied to the buffer, 0 indicates EOF on archive volume, + * -1 is a read error + */ + +int +rd_wrbuf(char *in, int cpcnt) +{ + int res; + int cnt; + int incnt = cpcnt; + + /* + * loop until we fill the buffer with the requested number of bytes + */ + while (incnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) { + /* + * read error, return what we got (or the error if + * no data was copied). The caller must know that an + * error occured and has the best knowledge what to + * do with it + */ + if ((res = cpcnt - incnt) > 0) + return(res); + return(cnt); + } + + /* + * calculate how much data to copy based on whats left and + * state of buffer + */ + cnt = MIN(cnt, incnt); + memcpy(in, bufpt, cnt); + bufpt += cnt; + incnt -= cnt; + in += cnt; + } + return(cpcnt); +} + +/* + * wr_skip() + * skip forward during a write. In other words add padding to the file. + * we add zero filled padding as it makes flawed archives much easier to + * recover from. the caller tells us how many bytes of padding to add + * This routine was not designed to add HUGE amount of padding, just small + * amounts (a few 512 byte blocks at most) + * Return: + * 0 if ok, -1 if there was a buf_flush failure + */ + +int +wr_skip(off_t skcnt) +{ + int cnt; + + /* + * loop while there is more padding to add + */ + while (skcnt > 0L) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + cnt = MIN(cnt, skcnt); + memset(bufpt, 0, cnt); + bufpt += cnt; + skcnt -= cnt; + } + return(0); +} + +/* + * wr_rdfile() + * fill write buffer with the contents of a file. We are passed an open + * file descriptor to the file and the archive structure that describes the + * file we are storing. The variable "left" is modified to contain the + * number of bytes of the file we were NOT able to write to the archive. + * it is important that we always write EXACTLY the number of bytes that + * the format specific write routine told us to. The file can also get + * bigger, so reading to the end of file would create an improper archive, + * we just detect this case and warn the user. We never create a bad + * archive if we can avoid it. Of course trying to archive files that are + * active is asking for trouble. It we fail, we pass back how much we + * could NOT copy and let the caller deal with it. + * Return: + * 0 ok, -1 if archive write failure. a short read of the file returns a + * 0, but "left" is set to be greater than zero. + */ + +int +wr_rdfile(ARCHD *arcn, int ifd, off_t *left) +{ + int cnt; + int res = 0; + off_t size = arcn->sb.st_size; + struct stat sb; + + /* + * while there are more bytes to write + */ + while (size > 0L) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) { + *left = size; + return(-1); + } + cnt = MIN(cnt, size); + if ((res = read(ifd, bufpt, cnt)) <= 0) + break; + size -= res; + bufpt += res; + } + + /* + * better check the file did not change during this operation + * or the file read failed. + */ + if (res < 0) + syswarn(1, errno, "Read fault on %s", arcn->org_name); + else if (size != 0L) + paxwarn(1, "File changed size during read %s", arcn->org_name); + else if (fstat(ifd, &sb) < 0) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + paxwarn(1, "File %s was modified during copy to archive", + arcn->org_name); + *left = size; + return(0); +} + +/* + * rd_wrfile() + * extract the contents of a file from the archive. If we are unable to + * extract the entire file (due to failure to write the file) we return + * the numbers of bytes we did NOT process. This way the caller knows how + * many bytes to skip past to find the next archive header. If the failure + * was due to an archive read, we will catch that when we try to skip. If + * the format supplies a file data crc value, we calculate the actual crc + * so that it can be compared to the value stored in the header + * NOTE: + * We call a special function to write the file. This function attempts to + * restore file holes (blocks of zeros) into the file. When files are + * sparse this saves space, and is a LOT faster. For non sparse files + * the performance hit is small. As of this writing, no archive supports + * information on where the file holes are. + * Return: + * 0 ok, -1 if archive read failure. if we cannot write the entire file, + * we return a 0 but "left" is set to be the amount unwritten + */ + +int +rd_wrfile(ARCHD *arcn, int ofd, off_t *left) +{ + int cnt = 0; + off_t size = arcn->sb.st_size; + int res = 0; + char *fnm = arcn->name; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + u_long crc = 0L; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(ofd, &sb) == 0) { +#if 0 + /* not under minix */ + if (sb.st_blksize > 0) + sz = (int)sb.st_blksize; +#endif + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + *left = 0L; + + /* + * Copy the archive to the file the number of bytes specified. We have + * to assume that we want to recover file holes as none of the archive + * formats can record the location of file holes. + */ + while (size > 0L) { + cnt = bufend - bufpt; + /* + * if we get a read error, we do not want to skip, as we may + * miss a header, so we do not set left, but if we get a write + * error, we do want to skip over the unprocessed data. + */ + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) + break; + cnt = MIN(cnt, size); + if ((res = file_write(ofd,bufpt,cnt,&rem,&isem,sz,fnm)) <= 0) { + *left = size; + break; + } + + if (docrc) { + /* + * update the actual crc value + */ + cnt = res; + while (--cnt >= 0) + crc += *bufpt++ & 0xff; + } else + bufpt += res; + size -= res; + } + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (isem && (arcn->sb.st_size > 0L)) + file_flush(ofd, fnm, isem); + + /* + * if we failed from archive read, we do not want to skip + */ + if ((size > 0L) && (*left == 0L)) + return(-1); + + /* + * some formats record a crc on file data. If so, then we compare the + * calculated crc to the crc stored in the archive + */ + if (docrc && (size == 0L) && (arcn->crc != crc)) + paxwarn(1,"Actual crc does not match expected crc %s",arcn->name); + return(0); +} + +/* + * cp_file() + * copy the contents of one file to another. used during -rw phase of pax + * just as in rd_wrfile() we use a special write function to write the + * destination file so we can properly copy files with holes. + */ + +void +cp_file(ARCHD *arcn, int fd1, int fd2) +{ + int cnt; + off_t cpcnt = 0L; + int res = 0; + char *fnm = arcn->name; + int no_hole = 0; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + + /* + * check for holes in the source file. If none, we will use regular + * write instead of file write. + */ +#if 0 + /* not under minix */ + if (((off_t)(arcn->sb.st_blocks * BLKMULT)) >= arcn->sb.st_size) +#endif + ++no_hole; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(fd2, &sb) == 0) { +#if 0 + /* not under minix */ + if (sb.st_blksize > 0) + sz = sb.st_blksize; +#endif + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + + /* + * read the source file and copy to destination file until EOF + */ + for(;;) { + if ((cnt = read(fd1, buf, blksz)) <= 0) + break; + if (no_hole) + res = write(fd2, buf, cnt); + else + res = file_write(fd2, buf, cnt, &rem, &isem, sz, fnm); + if (res != cnt) + break; + cpcnt += cnt; + } + + /* + * check to make sure the copy is valid. + */ + if (res < 0) + syswarn(1, errno, "Failed write during copy of %s to %s", + arcn->org_name, arcn->name); + else if (cpcnt != arcn->sb.st_size) + paxwarn(1, "File %s changed size during copy to %s", + arcn->org_name, arcn->name); + else if (fstat(fd1, &sb) < 0) + syswarn(1, errno, "Failed stat of %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + paxwarn(1, "File %s was modified during copy to %s", + arcn->org_name, arcn->name); + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (!no_hole && isem && (arcn->sb.st_size > 0L)) + file_flush(fd2, fnm, isem); + return; +} + +/* + * buf_fill() + * fill the read buffer with the next record (or what we can get) from + * the archive volume. + * Return: + * Number of bytes of data in the read buffer, -1 for read error, and + * 0 when finished (user specified termination in ar_next()). + */ + +int +buf_fill(void) +{ + int cnt; + static int fini = 0; + + if (fini) + return(0); + + for(;;) { + /* + * try to fill the buffer. on error the next archive volume is + * opened and we try again. + */ + if ((cnt = ar_read(buf, blksz)) > 0) { + bufpt = buf; + bufend = buf + cnt; + rdcnt += cnt; + return(cnt); + } + + /* + * errors require resync, EOF goes to next archive + */ + if (cnt < 0) + break; + if (ar_next() < 0) { + fini = 1; + return(0); + } + rdcnt = 0; + } + exit_val = 1; + return(-1); +} + +/* + * buf_flush() + * force the write buffer to the archive. We are passed the number of + * bytes in the buffer at the point of the flush. When we change archives + * the record size might change. (either larger or smaller). + * Return: + * 0 if all is ok, -1 when a write error occurs. + */ + +int +buf_flush(int bufcnt) +{ + int cnt; + int push = 0; + int totcnt = 0; + + /* + * if we have reached the user specified byte count for each archive + * volume, prompt for the next volume. (The non-standrad -R flag). + * NOTE: If the wrlimit is smaller than wrcnt, we will always write + * at least one record. We always round limit UP to next blocksize. + */ + if ((wrlimit > 0) && (wrcnt > wrlimit)) { + paxwarn(0, "User specified archive volume byte limit reached."); + if (ar_next() < 0) { + wrcnt = 0; + exit_val = 1; + return(-1); + } + wrcnt = 0; + + /* + * The new archive volume might have changed the size of the + * write blocksize. if so we figure out if we need to write + * (one or more times), or if there is now free space left in + * the buffer (it is no longer full). bufcnt has the number of + * bytes in the buffer, (the blocksize, at the point we were + * CALLED). Push has the amount of "extra" data in the buffer + * if the block size has shrunk from a volume change. + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * We have enough data to write at least one archive block + */ + for (;;) { + /* + * write a block and check if it all went out ok + */ + cnt = ar_write(buf, blksz); + if (cnt == blksz) { + /* + * the write went ok + */ + wrcnt += cnt; + totcnt += cnt; + if (push > 0) { + /* we have extra data to push to the front. + * check for more than 1 block of push, and if + * so we loop back to write again + */ + memcpy(buf, bufend, push); + bufpt = buf + push; + if (push >= blksz) { + push -= blksz; + continue; + } + } else + bufpt = buf; + return(totcnt); + } else if (cnt > 0) { + /* + * Oh drat we got a partial write! + * if format doesnt care about alignment let it go, + * we warned the user in ar_write().... but this means + * the last record on this volume violates pax spec.... + */ + totcnt += cnt; + wrcnt += cnt; + bufpt = buf + cnt; + cnt = bufcnt - cnt; + memcpy(buf, bufpt, cnt); + bufpt = buf + cnt; + if (!frmt->blkalgn || ((cnt % frmt->blkalgn) == 0)) + return(totcnt); + break; + } + + /* + * All done, go to next archive + */ + wrcnt = 0; + if (ar_next() < 0) + break; + + /* + * The new archive volume might also have changed the block + * size. if so, figure out if we have too much or too little + * data for using the new block size + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * write failed, stop pax. we must not create a bad archive! + */ + exit_val = 1; + return(-1); +} diff --git a/commands/pax/cache.c b/commands/pax/cache.c new file mode 100644 index 000000000..25dfd9ec5 --- /dev/null +++ b/commands/pax/cache.c @@ -0,0 +1,430 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)cache.c 8.1 (Berkeley) 5/31/93"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "cache.h" +#include "extern.h" + +/* + * routines that control user, group, uid and gid caches (for the archive + * member print routine). + * IMPORTANT: + * these routines cache BOTH hits and misses, a major performance improvement + */ + +static int pwopn = 0; /* is password file open */ +static int gropn = 0; /* is group file open */ +static UIDC **uidtb = NULL; /* uid to name cache */ +static GIDC **gidtb = NULL; /* gid to name cache */ +static UIDC **usrtb = NULL; /* user name to uid cache */ +static GIDC **grptb = NULL; /* group name to gid cache */ + +/* + * uidtb_start + * creates an an empty uidtb + * Return: + * 0 if ok, -1 otherwise + */ + +int +uidtb_start(void) +{ + static int fail = 0; + + if (uidtb != NULL) + return(0); + if (fail) + return(-1); + if ((uidtb = (UIDC **)calloc(UID_SZ, sizeof(UIDC *))) == NULL) { + ++fail; + paxwarn(1, "Unable to allocate memory for user id cache table"); + return(-1); + } + return(0); +} + +/* + * gidtb_start + * creates an an empty gidtb + * Return: + * 0 if ok, -1 otherwise + */ + +int +gidtb_start(void) +{ + static int fail = 0; + + if (gidtb != NULL) + return(0); + if (fail) + return(-1); + if ((gidtb = (GIDC **)calloc(GID_SZ, sizeof(GIDC *))) == NULL) { + ++fail; + paxwarn(1, "Unable to allocate memory for group id cache table"); + return(-1); + } + return(0); +} + +/* + * usrtb_start + * creates an an empty usrtb + * Return: + * 0 if ok, -1 otherwise + */ + +int +usrtb_start(void) +{ + static int fail = 0; + + if (usrtb != NULL) + return(0); + if (fail) + return(-1); + if ((usrtb = (UIDC **)calloc(UNM_SZ, sizeof(UIDC *))) == NULL) { + ++fail; + paxwarn(1, "Unable to allocate memory for user name cache table"); + return(-1); + } + return(0); +} + +/* + * grptb_start + * creates an an empty grptb + * Return: + * 0 if ok, -1 otherwise + */ + +int +grptb_start(void) +{ + static int fail = 0; + + if (grptb != NULL) + return(0); + if (fail) + return(-1); + if ((grptb = (GIDC **)calloc(GNM_SZ, sizeof(GIDC *))) == NULL) { + ++fail; + paxwarn(1,"Unable to allocate memory for group name cache table"); + return(-1); + } + return(0); +} + +/* + * name_uid() + * caches the name (if any) for the uid. If frc set, we always return the + * the stored name (if valid or invalid match). We use a simple hash table. + * Return + * Pointer to stored name (or an empty string). + */ + +const char * +name_uid(uid_t uid, int frc) +{ + struct passwd *pw; + UIDC *ptr; + + if ((uidtb == NULL) && (uidtb_start() < 0)) + return(""); + + /* + * see if we have this uid cached + */ + ptr = uidtb[uid % UID_SZ]; + if ((ptr != NULL) && (ptr->valid > 0) && (ptr->uid == uid)) { + /* + * have an entry for this uid + */ + if (frc || (ptr->valid == VALID)) + return(ptr->name); + return(""); + } + + /* + * No entry for this uid, we will add it + */ + if (!pwopn) { + setpassent(1); + ++pwopn; + } + if (ptr == NULL) + ptr = uidtb[uid % UID_SZ] = (UIDC *)malloc(sizeof(UIDC)); + + if ((pw = getpwuid(uid)) == NULL) { + /* + * no match for this uid in the local password file + * a string that is the uid in numeric format + */ + if (ptr == NULL) + return(""); + ptr->uid = uid; + ptr->valid = INVALID; +# ifdef NET2_STAT + (void)snprintf(ptr->name, sizeof(ptr->name), "%u", uid); +# else + (void)snprintf(ptr->name, sizeof(ptr->name), "%lu", + (unsigned long)uid); +# endif + if (frc == 0) + return(""); + } else { + /* + * there is an entry for this uid in the password file + */ + if (ptr == NULL) + return(pw->pw_name); + ptr->uid = uid; + (void)strncpy(ptr->name, pw->pw_name, UNMLEN - 1); + ptr->name[UNMLEN-1] = '\0'; + ptr->valid = VALID; + } + return(ptr->name); +} + +/* + * name_gid() + * caches the name (if any) for the gid. If frc set, we always return the + * the stored name (if valid or invalid match). We use a simple hash table. + * Return + * Pointer to stored name (or an empty string). + */ + +const char * +name_gid(gid_t gid, int frc) +{ + struct group *gr; + GIDC *ptr; + + if ((gidtb == NULL) && (gidtb_start() < 0)) + return(""); + + /* + * see if we have this gid cached + */ + ptr = gidtb[gid % GID_SZ]; + if ((ptr != NULL) && (ptr->valid > 0) && (ptr->gid == gid)) { + /* + * have an entry for this gid + */ + if (frc || (ptr->valid == VALID)) + return(ptr->name); + return(""); + } + + /* + * No entry for this gid, we will add it + */ + if (!gropn) { + setgroupent(1); + ++gropn; + } + if (ptr == NULL) + ptr = gidtb[gid % GID_SZ] = (GIDC *)malloc(sizeof(GIDC)); + + if ((gr = getgrgid(gid)) == NULL) { + /* + * no match for this gid in the local group file, put in + * a string that is the gid in numeric format + */ + if (ptr == NULL) + return(""); + ptr->gid = gid; + ptr->valid = INVALID; +# ifdef NET2_STAT + (void)snprintf(ptr->name, sizeof(ptr->name), "%u", gid); +# else + (void)snprintf(ptr->name, sizeof(ptr->name), "%lu", + (unsigned long)gid); +# endif + if (frc == 0) + return(""); + } else { + /* + * there is an entry for this group in the group file + */ + if (ptr == NULL) + return(gr->gr_name); + ptr->gid = gid; + (void)strncpy(ptr->name, gr->gr_name, GNMLEN - 1); + ptr->name[GNMLEN-1] = '\0'; + ptr->valid = VALID; + } + return(ptr->name); +} + +/* + * uid_name() + * caches the uid for a given user name. We use a simple hash table. + * Return + * the uid (if any) for a user name, or a -1 if no match can be found + */ + +int +uid_name(char *name, uid_t *uid) +{ + struct passwd *pw; + UIDC *ptr; + int namelen; + + /* + * return -1 for mangled names + */ + if (((namelen = strlen(name)) == 0) || (name[0] == '\0')) + return(-1); + if ((usrtb == NULL) && (usrtb_start() < 0)) + return(-1); + + /* + * look up in hash table, if found and valid return the uid, + * if found and invalid, return a -1 + */ + ptr = usrtb[st_hash(name, namelen, UNM_SZ)]; + if ((ptr != NULL) && (ptr->valid > 0) && !strcmp(name, ptr->name)) { + if (ptr->valid == INVALID) + return(-1); + *uid = ptr->uid; + return(0); + } + + if (!pwopn) { + setpassent(1); + ++pwopn; + } + + if (ptr == NULL) + ptr = usrtb[st_hash(name, namelen, UNM_SZ)] = + (UIDC *)malloc(sizeof(UIDC)); + + /* + * no match, look it up, if no match store it as an invalid entry, + * or store the matching uid + */ + if (ptr == NULL) { + if ((pw = getpwnam(name)) == NULL) + return(-1); + *uid = pw->pw_uid; + return(0); + } + (void)strncpy(ptr->name, name, UNMLEN - 1); + ptr->name[UNMLEN-1] = '\0'; + if ((pw = getpwnam(name)) == NULL) { + ptr->valid = INVALID; + return(-1); + } + ptr->valid = VALID; + *uid = ptr->uid = pw->pw_uid; + return(0); +} + +/* + * gid_name() + * caches the gid for a given group name. We use a simple hash table. + * Return + * the gid (if any) for a group name, or a -1 if no match can be found + */ + +int +gid_name(char *name, gid_t *gid) +{ + struct group *gr; + GIDC *ptr; + int namelen; + + /* + * return -1 for mangled names + */ + if (((namelen = strlen(name)) == 0) || (name[0] == '\0')) + return(-1); + if ((grptb == NULL) && (grptb_start() < 0)) + return(-1); + + /* + * look up in hash table, if found and valid return the uid, + * if found and invalid, return a -1 + */ + ptr = grptb[st_hash(name, namelen, GID_SZ)]; + if ((ptr != NULL) && (ptr->valid > 0) && !strcmp(name, ptr->name)) { + if (ptr->valid == INVALID) + return(-1); + *gid = ptr->gid; + return(0); + } + + if (!gropn) { + setgroupent(1); + ++gropn; + } + if (ptr == NULL) + ptr = grptb[st_hash(name, namelen, GID_SZ)] = + (GIDC *)malloc(sizeof(GIDC)); + + /* + * no match, look it up, if no match store it as an invalid entry, + * or store the matching gid + */ + if (ptr == NULL) { + if ((gr = getgrnam(name)) == NULL) + return(-1); + *gid = gr->gr_gid; + return(0); + } + + (void)strncpy(ptr->name, name, GNMLEN - 1); + ptr->name[GNMLEN-1] = '\0'; + if ((gr = getgrnam(name)) == NULL) { + ptr->valid = INVALID; + return(-1); + } + ptr->valid = VALID; + *gid = ptr->gid = gr->gr_gid; + return(0); +} diff --git a/commands/pax/cache.h b/commands/pax/cache.h new file mode 100644 index 000000000..9d0ddcba0 --- /dev/null +++ b/commands/pax/cache.h @@ -0,0 +1,71 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cache.h 8.1 (Berkeley) 5/31/93 + * $FreeBSD: src/bin/pax/cache.h,v 1.9 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * Constants and data structures used to implement group and password file + * caches. Traditional passwd/group cache routines perform quite poorly with + * archives. The chances of hitting a valid lookup with an archive is quite a + * bit worse than with files already resident on the file system. These misses + * create a MAJOR performance cost. To address this problem, these routines + * cache both hits and misses. + * + * NOTE: name lengths must be as large as those stored in ANY PROTOCOL and + * as stored in the passwd and group files. CACHE SIZES MUST BE PRIME + */ +#define UNMLEN 32 /* >= user name found in any protocol */ +#define GNMLEN 32 /* >= group name found in any protocol */ +#define UID_SZ 317 /* size of user_name/uid cache */ +#define UNM_SZ 317 /* size of user_name/uid cache */ +#define GID_SZ 251 /* size of gid cache */ +#define GNM_SZ 317 /* size of group name cache */ +#define VALID 1 /* entry and name are valid */ +#define INVALID 2 /* entry valid, name NOT valid */ + +/* + * Node structures used in the user, group, uid, and gid caches. + */ + +typedef struct uidc { + int valid; /* is this a valid or a miss entry */ + char name[UNMLEN]; /* uid name */ + uid_t uid; /* cached uid */ +} UIDC; + +typedef struct gidc { + int valid; /* is this a valid or a miss entry */ + char name[GNMLEN]; /* gid name */ + gid_t gid; /* cached gid */ +} GIDC; diff --git a/commands/pax/cpio.c b/commands/pax/cpio.c new file mode 100644 index 000000000..0a86a1f9f --- /dev/null +++ b/commands/pax/cpio.c @@ -0,0 +1,1152 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)cpio.c 8.1 (Berkeley) 5/31/93"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "cpio.h" +#include "extern.h" + +static int rd_nm(ARCHD *, int); +static int rd_ln_nm(ARCHD *); +static int com_rd(ARCHD *); + +/* + * Routines which support the different cpio versions + */ + +static int swp_head; /* binary cpio header byte swap */ + +/* + * Routines common to all versions of cpio + */ + +/* + * cpio_strd() + * Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +cpio_strd(void) +{ + return(lnk_start()); +} + +/* + * cpio_trail() + * Called to determine if a header block is a valid trailer. We are + * passed the block, the in_sync flag (which tells us we are in resync + * mode; looking for a valid header), and cnt (which starts at zero) + * which is used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, + */ + +int +cpio_trail(ARCHD *arcn) +{ + /* + * look for trailer id in file we are about to process + */ + if ((strcmp(arcn->name, TRAILER) == 0) && (arcn->sb.st_size == 0)) + return(0); + return(-1); +} + +/* + * com_rd() + * operations common to all cpio read functions. + * Return: + * 0 + */ + +static int +com_rd(ARCHD *arcn) +{ + arcn->skip = 0; + arcn->pat = NULL; + arcn->org_name = arcn->name; + switch(arcn->sb.st_mode & C_IFMT) { + case C_ISFIFO: + arcn->type = PAX_FIF; + break; + case C_ISDIR: + arcn->type = PAX_DIR; + break; + case C_ISBLK: + arcn->type = PAX_BLK; + break; + case C_ISCHR: + arcn->type = PAX_CHR; + break; + case C_ISLNK: + arcn->type = PAX_SLK; + break; + case C_ISOCK: + arcn->type = PAX_SCK; + break; + case C_ISCTG: + case C_ISREG: + default: + /* + * we have file data, set up skip (pad is set in the format + * specific sections) + */ + arcn->sb.st_mode = (arcn->sb.st_mode & 0xfff) | C_ISREG; + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + } + if (chk_lnk(arcn) < 0) + return(-1); + return(0); +} + +/* + * cpio_end_wr() + * write the special file with the name trailer in the proper format + * Return: + * result of the write of the trailer from the cpio specific write func + */ + +int +cpio_endwr(void) +{ + ARCHD last; + + /* + * create a trailer request and call the proper format write function + */ + memset(&last, 0, sizeof(last)); + last.nlen = sizeof(TRAILER) - 1; + last.type = PAX_REG; + last.sb.st_nlink = 1; + (void)strcpy(last.name, TRAILER); + return((*frmt->wr)(&last)); +} + +/* + * rd_nam() + * read in the file name which follows the cpio header + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_nm(ARCHD *arcn, int nsz) +{ + /* + * do not even try bogus values + */ + if ((nsz == 0) || (nsz > (int)sizeof(arcn->name))) { + paxwarn(1, "Cpio file name length %d is out of range", nsz); + return(-1); + } + + /* + * read the name and make sure it is not empty and is \0 terminated + */ + if ((rd_wrbuf(arcn->name,nsz) != nsz) || (arcn->name[nsz-1] != '\0') || + (arcn->name[0] == '\0')) { + paxwarn(1, "Cpio file name in header is corrupted"); + return(-1); + } + return(0); +} + +/* + * rd_ln_nm() + * read in the link name for a file with links. The link name is stored + * like file data (and is NOT \0 terminated!) + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_ln_nm(ARCHD *arcn) +{ + /* + * check the length specified for bogus values + */ + if ((arcn->sb.st_size == 0) || + ((size_t)arcn->sb.st_size >= sizeof(arcn->ln_name))) { +# ifdef NET2_STAT + paxwarn(1, "Cpio link name length is invalid: %lu", + arcn->sb.st_size); +# else + paxwarn(1, "Cpio link name length is invalid: %ju", + (uintmax_t)arcn->sb.st_size); +# endif + return(-1); + } + + /* + * read in the link name and \0 terminate it + */ + if (rd_wrbuf(arcn->ln_name, (int)arcn->sb.st_size) != + (int)arcn->sb.st_size) { + paxwarn(1, "Cpio link name read error"); + return(-1); + } + arcn->ln_nlen = arcn->sb.st_size; + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * watch out for those empty link names + */ + if (arcn->ln_name[0] == '\0') { + paxwarn(1, "Cpio link name is corrupt"); + return(-1); + } + return(0); +} + +/* + * Routines common to the extended byte oriented cpio format + */ + +/* + * cpio_id() + * determine if a block given to us is a valid extended byte oriented + * cpio header + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +cpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_CPIO)) || + (strncmp(blk, AMAGIC, sizeof(AMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * cpio_rd() + * determine if a buffer is a byte oriented extended cpio archive entry. + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +cpio_rd(ARCHD *arcn, char *buf) +{ + int nsz; + HD_CPIO *hd; + + /* + * check that this is a valid header, if not return -1 + */ + if (cpio_id(buf, sizeof(HD_CPIO)) < 0) + return(-1); + hd = (HD_CPIO *)buf; + + /* + * byte oriented cpio (posix) does not have padding! extract the octal + * ascii fields from the header + */ + arcn->pad = 0L; + arcn->sb.st_dev = (dev_t)asc_ul(hd->c_dev, sizeof(hd->c_dev), OCT); + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), OCT); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), OCT); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), OCT); + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + OCT); + arcn->sb.st_rdev = (dev_t)asc_ul(hd->c_rdev, sizeof(hd->c_rdev), OCT); +#ifdef NET2_STAT + arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime, sizeof(hd->c_mtime), + OCT); +#else + arcn->sb.st_mtime = (time_t)asc_uqd(hd->c_mtime, sizeof(hd->c_mtime), + OCT); +#endif + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; +#ifdef NET2_STAT + arcn->sb.st_size = (off_t)asc_ul(hd->c_filesize,sizeof(hd->c_filesize), + OCT); +#else + arcn->sb.st_size = (off_t)asc_uqd(hd->c_filesize,sizeof(hd->c_filesize), + OCT); +#endif + + /* + * check name size and if valid, read in the name of this entry (name + * follows header in the archive) + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),OCT)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * no link name to read for this file + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + return(com_rd(arcn)); + } + + /* + * check link name size and read in the link name. Link names are + * stored like file data. + */ + if (rd_ln_nm(arcn) < 0) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * cpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +cpio_endrd(void) +{ + return((off_t)(sizeof(HD_CPIO) + sizeof(TRAILER))); +} + +/* + * cpio_stwr() + * start up the device mapping table + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +cpio_stwr(void) +{ + return(dev_start()); +} + +/* + * cpio_wr() + * copy the data in the ARCHD to buffer in extended byte oriented cpio + * format. + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +cpio_wr(ARCHD *arcn) +{ + HD_CPIO *hd; + int nsz; + char hdblk[sizeof(HD_CPIO)]; + + /* + * check and repair truncated device and inode fields in the header + */ + if (map_dev(arcn, (u_long)CPIO_MASK, (u_long)CPIO_MASK) < 0) + return(-1); + + arcn->pad = 0L; + nsz = arcn->nlen + 1; + hd = (HD_CPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * set data size for file data + */ +# ifdef NET2_STAT + if (ul_asc((u_long)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { +# else + if (uqd_asc((u_quad_t)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { +# endif + paxwarn(1,"File is too large for cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * set data size to hold link name + */ + if (ul_asc((u_long)arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) + goto out; + break; + default: + /* + * all other file types have no file data + */ + if (ul_asc((u_long)0, hd->c_filesize, sizeof(hd->c_filesize), + OCT)) + goto out; + break; + } + + /* + * copy the values to the header using octal ascii + */ + if (ul_asc((u_long)MAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc((u_long)arcn->sb.st_dev, hd->c_dev, sizeof(hd->c_dev), + OCT) || + ul_asc((u_long)arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), + OCT) || + ul_asc((u_long)arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), + OCT) || + ul_asc((u_long)arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), + OCT) || + ul_asc((u_long)arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), + OCT) || + ul_asc((u_long)arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), + OCT) || + ul_asc((u_long)arcn->sb.st_rdev, hd->c_rdev, sizeof(hd->c_rdev), + OCT) || + ul_asc((u_long)arcn->sb.st_mtime,hd->c_mtime,sizeof(hd->c_mtime), + OCT) || + ul_asc((u_long)nsz, hd->c_namesize, sizeof(hd->c_namesize), OCT)) + goto out; + + /* + * write the file name to the archive + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_CPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0)) { + paxwarn(1, "Unable to write cpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if this file has data, we are done. The caller will write the file + * data, if we are link tell caller we are done, go to next file + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return(0); + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name to the archive, tell the caller to go to the + * next file as we are done. + */ + if (wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) { + paxwarn(1,"Unable to write cpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Cpio header field is too small to store file %s", + arcn->org_name); + return(1); +} + +/* + * Routines common to the system VR4 version of cpio (with/without file CRC) + */ + +/* + * vcpio_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITHOUT crc. WATCH it the magic cookies are in OCTAL, the header + * uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +vcpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVMAGIC, sizeof(AVMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITH crc. WATCH it the magic cookies are in OCTAL the header uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +crc_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVCMAGIC, (int)sizeof(AVCMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_strd() + w set file data CRC calculations. Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +crc_strd(void) +{ + docrc = 1; + return(lnk_start()); +} + +/* + * vcpio_rd() + * determine if a buffer is a system VR4 archive entry. (with/without CRC) + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +vcpio_rd(ARCHD *arcn, char *buf) +{ + HD_VCPIO *hd; + dev_t devminor; + dev_t devmajor; + int nsz; + + /* + * during the id phase it was determined if we were using CRC, use the + * proper id routine. + */ + if (docrc) { + if (crc_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } else { + if (vcpio_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } + + hd = (HD_VCPIO *)buf; + arcn->pad = 0L; + + /* + * extract the hex ascii fields from the header + */ + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), HEX); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), HEX); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), HEX); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), HEX); +#ifdef NET2_STAT + arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime,sizeof(hd->c_mtime),HEX); +#else + arcn->sb.st_mtime = (time_t)asc_uqd(hd->c_mtime,sizeof(hd->c_mtime),HEX); +#endif + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; +#ifdef NET2_STAT + arcn->sb.st_size = (off_t)asc_ul(hd->c_filesize, + sizeof(hd->c_filesize), HEX); +#else + arcn->sb.st_size = (off_t)asc_uqd(hd->c_filesize, + sizeof(hd->c_filesize), HEX); +#endif + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + HEX); + devmajor = (dev_t)asc_ul(hd->c_maj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_min, sizeof(hd->c_min), HEX); + arcn->sb.st_dev = TODEV(devmajor, devminor); + devmajor = (dev_t)asc_ul(hd->c_rmaj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_rmin, sizeof(hd->c_min), HEX); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + arcn->crc = asc_ul(hd->c_chksum, sizeof(hd->c_chksum), HEX); + + /* + * check the length of the file name, if ok read it in, return -1 if + * bogus + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),HEX)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * skip padding. header + filename is aligned to 4 byte boundries + */ + if (rd_skip((off_t)(VCPIO_PAD(sizeof(HD_VCPIO) + nsz))) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + /* + * read in the link name and skip over the padding + */ + if ((rd_ln_nm(arcn) < 0) || + (rd_skip((off_t)(VCPIO_PAD(arcn->sb.st_size))) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * vcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +vcpio_endrd(void) +{ + return((off_t)(sizeof(HD_VCPIO) + sizeof(TRAILER) + + (VCPIO_PAD(sizeof(HD_VCPIO) + sizeof(TRAILER))))); +} + +/* + * crc_stwr() + * start up the device mapping table, enable crc file calculation + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +crc_stwr(void) +{ + docrc = 1; + return(dev_start()); +} + +/* + * vcpio_wr() + * copy the data in the ARCHD to buffer in system VR4 cpio + * (with/without crc) format. + * Return + * 0 if file has data to be written after the header, 1 if file has + * NO data to write after the header, -1 if archive write failed + */ + +int +vcpio_wr(ARCHD *arcn) +{ + HD_VCPIO *hd; + unsigned int nsz; + char hdblk[sizeof(HD_VCPIO)]; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, (u_long)VCPIO_MASK, (u_long)VCPIO_MASK) < 0) + return(-1); + nsz = arcn->nlen + 1; + hd = (HD_VCPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + /* + * add the proper magic value depending whether we were asked for + * file data crc's, and the crc if needed. + */ + if (docrc) { + if (ul_asc((u_long)VCMAGIC, hd->c_magic, sizeof(hd->c_magic), + OCT) || + ul_asc((u_long)arcn->crc,hd->c_chksum,sizeof(hd->c_chksum), + HEX)) + goto out; + } else { + if (ul_asc((u_long)VMAGIC, hd->c_magic, sizeof(hd->c_magic), + OCT) || + ul_asc((u_long)0L, hd->c_chksum, sizeof(hd->c_chksum),HEX)) + goto out; + } + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = VCPIO_PAD(arcn->sb.st_size); +# ifdef NET2_STAT + if (ul_asc((u_long)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { +# else + if (uqd_asc((u_quad_t)arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { +# endif + paxwarn(1,"File is too large for sv4cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0L; + if (ul_asc((u_long)arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0L; + if (ul_asc((u_long)0L, hd->c_filesize, sizeof(hd->c_filesize), + HEX)) + goto out; + break; + } + + /* + * set the other fields in the header + */ + if (ul_asc((u_long)arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), + HEX) || + ul_asc((u_long)arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), + HEX) || + ul_asc((u_long)arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), + HEX) || + ul_asc((u_long)arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), + HEX) || + ul_asc((u_long)arcn->sb.st_mtime, hd->c_mtime, sizeof(hd->c_mtime), + HEX) || + ul_asc((u_long)arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), + HEX) || + ul_asc((u_long)major(arcn->sb.st_dev),hd->c_maj, sizeof(hd->c_maj), + HEX) || + ul_asc((u_long)minor(arcn->sb.st_dev),hd->c_min, sizeof(hd->c_min), + HEX) || + ul_asc((u_long)major(arcn->sb.st_rdev),hd->c_rmaj,sizeof(hd->c_maj), + HEX) || + ul_asc((u_long)minor(arcn->sb.st_rdev),hd->c_rmin,sizeof(hd->c_min), + HEX) || + ul_asc((u_long)nsz, hd->c_namesize, sizeof(hd->c_namesize), HEX)) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_VCPIO)) < 0) || + (wr_rdbuf(arcn->name, (int)nsz) < 0) || + (wr_skip((off_t)(VCPIO_PAD(sizeof(HD_VCPIO) + nsz))) < 0)) { + paxwarn(1,"Could not write sv4cpio header for %s",arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done, copy the file + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip((off_t)(VCPIO_PAD(arcn->ln_nlen))) < 0)) { + paxwarn(1,"Could not write sv4cpio link name for %s", + arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1,"Sv4cpio header field is too small for file %s",arcn->org_name); + return(1); +} + +/* + * Routines common to the old binary header cpio + */ + +/* + * bcpio_id() + * determine if a block given to us is an old binary cpio header + * (with/without header byte swapping) + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +bcpio_id(char *blk, int size) +{ + if (size < (int)sizeof(HD_BCPIO)) + return(-1); + + /* + * check both normal and byte swapped magic cookies + */ + if (((u_short)SHRT_EXT(blk)) == MAGIC) + return(0); + if (((u_short)RSHRT_EXT(blk)) == MAGIC) { + if (!swp_head) + ++swp_head; + return(0); + } + return(-1); +} + +/* + * bcpio_rd() + * determine if a buffer is an old binary archive entry. (It may have byte + * swapped header) convert and store the values in the ARCHD parameter. + * This is a very old header format and should not really be used. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +bcpio_rd(ARCHD *arcn, char *buf) +{ + HD_BCPIO *hd; + int nsz; + + /* + * check the header + */ + if (bcpio_id(buf, sizeof(HD_BCPIO)) < 0) + return(-1); + + arcn->pad = 0L; + hd = (HD_BCPIO *)buf; + if (swp_head) { + /* + * header has swapped bytes on 16 bit boundaries + */ + arcn->sb.st_dev = (dev_t)(RSHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(RSHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(RSHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(RSHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(RSHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(RSHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(RSHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(RSHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(RSHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(RSHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(RSHRT_EXT(hd->h_filesize_2))); + nsz = (int)(RSHRT_EXT(hd->h_namesize)); + } else { + arcn->sb.st_dev = (dev_t)(SHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(SHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(SHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(SHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(SHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(SHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(SHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(SHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(SHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(SHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(SHRT_EXT(hd->h_filesize_2))); + nsz = (int)(SHRT_EXT(hd->h_namesize)); + } + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * check the file name size, if bogus give up. otherwise read the file + * name + */ + if (nsz < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * header + file name are aligned to 2 byte boundries, skip if needed + */ + if (rd_skip((off_t)(BCPIO_PAD(sizeof(HD_BCPIO) + nsz))) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode & C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)){ + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + if ((rd_ln_nm(arcn) < 0) || + (rd_skip((off_t)(BCPIO_PAD(arcn->sb.st_size))) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * bcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +bcpio_endrd(void) +{ + return((off_t)(sizeof(HD_BCPIO) + sizeof(TRAILER) + + (BCPIO_PAD(sizeof(HD_BCPIO) + sizeof(TRAILER))))); +} + +/* + * bcpio_wr() + * copy the data in the ARCHD to buffer in old binary cpio format + * There is a real chance of field overflow with this critter. So we + * always check the conversion is ok. nobody in his their right mind + * should write an achive in this format... + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +bcpio_wr(ARCHD *arcn) +{ + HD_BCPIO *hd; + int nsz; + char hdblk[sizeof(HD_BCPIO)]; + off_t t_offt; + int t_int; + time_t t_timet; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, (u_long)BCPIO_MASK, (u_long)BCPIO_MASK) < 0) + return(-1); + + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + hd = (HD_BCPIO *)hdblk; + + switch(arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + hd->h_filesize_1[0] = CHR_WR_0(arcn->sb.st_size); + hd->h_filesize_1[1] = CHR_WR_1(arcn->sb.st_size); + hd->h_filesize_2[0] = CHR_WR_2(arcn->sb.st_size); + hd->h_filesize_2[1] = CHR_WR_3(arcn->sb.st_size); + t_offt = (off_t)(SHRT_EXT(hd->h_filesize_1)); + t_offt = (t_offt<<16) | ((off_t)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->sb.st_size != t_offt) { + paxwarn(1,"File is too large for bcpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0L; + hd->h_filesize_1[0] = CHR_WR_0(arcn->ln_nlen); + hd->h_filesize_1[1] = CHR_WR_1(arcn->ln_nlen); + hd->h_filesize_2[0] = CHR_WR_2(arcn->ln_nlen); + hd->h_filesize_2[1] = CHR_WR_3(arcn->ln_nlen); + t_int = (int)(SHRT_EXT(hd->h_filesize_1)); + t_int = (t_int << 16) | ((int)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->ln_nlen != t_int) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0L; + hd->h_filesize_1[0] = (char)0; + hd->h_filesize_1[1] = (char)0; + hd->h_filesize_2[0] = (char)0; + hd->h_filesize_2[1] = (char)0; + break; + } + + /* + * build up the rest of the fields + */ + hd->h_magic[0] = CHR_WR_2(MAGIC); + hd->h_magic[1] = CHR_WR_3(MAGIC); + hd->h_dev[0] = CHR_WR_2(arcn->sb.st_dev); + hd->h_dev[1] = CHR_WR_3(arcn->sb.st_dev); + if (arcn->sb.st_dev != (dev_t)(SHRT_EXT(hd->h_dev))) + goto out; + hd->h_ino[0] = CHR_WR_2(arcn->sb.st_ino); + hd->h_ino[1] = CHR_WR_3(arcn->sb.st_ino); + if (arcn->sb.st_ino != (ino_t)(SHRT_EXT(hd->h_ino))) + goto out; + hd->h_mode[0] = CHR_WR_2(arcn->sb.st_mode); + hd->h_mode[1] = CHR_WR_3(arcn->sb.st_mode); + if (arcn->sb.st_mode != (mode_t)(SHRT_EXT(hd->h_mode))) + goto out; + hd->h_uid[0] = CHR_WR_2(arcn->sb.st_uid); + hd->h_uid[1] = CHR_WR_3(arcn->sb.st_uid); + if (arcn->sb.st_uid != (uid_t)(SHRT_EXT(hd->h_uid))) + goto out; + hd->h_gid[0] = CHR_WR_2(arcn->sb.st_gid); + hd->h_gid[1] = CHR_WR_3(arcn->sb.st_gid); + if (arcn->sb.st_gid != (gid_t)(SHRT_EXT(hd->h_gid))) + goto out; + hd->h_nlink[0] = CHR_WR_2(arcn->sb.st_nlink); + hd->h_nlink[1] = CHR_WR_3(arcn->sb.st_nlink); + if (arcn->sb.st_nlink != (nlink_t)(SHRT_EXT(hd->h_nlink))) + goto out; + hd->h_rdev[0] = CHR_WR_2(arcn->sb.st_rdev); + hd->h_rdev[1] = CHR_WR_3(arcn->sb.st_rdev); + if (arcn->sb.st_rdev != (dev_t)(SHRT_EXT(hd->h_rdev))) + goto out; + hd->h_mtime_1[0] = CHR_WR_0(arcn->sb.st_mtime); + hd->h_mtime_1[1] = CHR_WR_1(arcn->sb.st_mtime); + hd->h_mtime_2[0] = CHR_WR_2(arcn->sb.st_mtime); + hd->h_mtime_2[1] = CHR_WR_3(arcn->sb.st_mtime); + t_timet = (time_t)(SHRT_EXT(hd->h_mtime_1)); + t_timet = (t_timet << 16) | ((time_t)(SHRT_EXT(hd->h_mtime_2))); + if (arcn->sb.st_mtime != t_timet) + goto out; + nsz = arcn->nlen + 1; + hd->h_namesize[0] = CHR_WR_2(nsz); + hd->h_namesize[1] = CHR_WR_3(nsz); + if (nsz != (int)(SHRT_EXT(hd->h_namesize))) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_BCPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0) || + (wr_skip((off_t)(BCPIO_PAD(sizeof(HD_BCPIO) + nsz))) < 0)) { + paxwarn(1, "Could not write bcpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done + */ + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG) || + (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip((off_t)(BCPIO_PAD(arcn->ln_nlen))) < 0)) { + paxwarn(1,"Could not write bcpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1,"Bcpio header field is too small for file %s", arcn->org_name); + return(1); +} diff --git a/commands/pax/cpio.h b/commands/pax/cpio.h new file mode 100644 index 000000000..66d3d0d58 --- /dev/null +++ b/commands/pax/cpio.h @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cpio.h 8.1 (Berkeley) 5/31/93 + * $FreeBSD: src/bin/pax/cpio.h,v 1.7 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * Defines common to all versions of cpio + */ +#define TRAILER "TRAILER!!!" /* name in last archive record */ + +/* + * Header encoding of the different file types + */ +#define C_ISDIR 040000 /* Directory */ +#define C_ISFIFO 010000 /* FIFO */ +#define C_ISREG 0100000 /* Regular file */ +#define C_ISBLK 060000 /* Block special file */ +#define C_ISCHR 020000 /* Character special file */ +#define C_ISCTG 0110000 /* Reserved for contiguous files */ +#define C_ISLNK 0120000 /* Reserved for symbolic links */ +#define C_ISOCK 0140000 /* Reserved for sockets */ +#define C_IFMT 0170000 /* type of file */ + +/* + * Data Interchange Format - Extended cpio header format - POSIX 1003.1-1990 + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_dev[6]; /* device number */ + char c_ino[6]; /* inode number */ + char c_mode[6]; /* file type/access */ + char c_uid[6]; /* owners uid */ + char c_gid[6]; /* owners gid */ + char c_nlink[6]; /* # of links at archive creation */ + char c_rdev[6]; /* block/char major/minor # */ + char c_mtime[11]; /* modification time */ + char c_namesize[6]; /* length of pathname */ + char c_filesize[11]; /* length of file in bytes */ +} HD_CPIO; + +#define MAGIC 070707 /* transportable archive id */ + +#ifdef _PAX_ +#define AMAGIC "070707" /* ascii equivalent string of MAGIC */ +#define CPIO_MASK 0x3ffff /* bits valid in the dev/ino fields */ + /* used for dev/inode remaps */ +#endif /* _PAX_ */ + +/* + * Binary cpio header structure + * + * CAUTION! CAUTION! CAUTION! + * Each field really represents a 16 bit short (NOT ASCII). Described as + * an array of chars in an attempt to improve portability!! + */ +typedef struct { + u_char h_magic[2]; + u_char h_dev[2]; + u_char h_ino[2]; + u_char h_mode[2]; + u_char h_uid[2]; + u_char h_gid[2]; + u_char h_nlink[2]; + u_char h_rdev[2]; + u_char h_mtime_1[2]; + u_char h_mtime_2[2]; + u_char h_namesize[2]; + u_char h_filesize_1[2]; + u_char h_filesize_2[2]; +} HD_BCPIO; + +#ifdef _PAX_ +/* + * extraction and creation macros for binary cpio + */ +#define SHRT_EXT(ch) ((((unsigned)(ch)[0])<<8) | (((unsigned)(ch)[1])&0xff)) +#define RSHRT_EXT(ch) ((((unsigned)(ch)[1])<<8) | (((unsigned)(ch)[0])&0xff)) +#define CHR_WR_0(val) ((char)(((val) >> 24) & 0xff)) +#define CHR_WR_1(val) ((char)(((val) >> 16) & 0xff)) +#define CHR_WR_2(val) ((char)(((val) >> 8) & 0xff)) +#define CHR_WR_3(val) ((char)((val) & 0xff)) + +/* + * binary cpio masks and pads + */ +#define BCPIO_PAD(x) ((2 - ((x) & 1)) & 1) /* pad to next 2 byte word */ +#define BCPIO_MASK 0xffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ + +/* + * System VR4 cpio header structure (with/without file data crc) + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_ino[8]; /* inode number */ + char c_mode[8]; /* file type/access */ + char c_uid[8]; /* owners uid */ + char c_gid[8]; /* owners gid */ + char c_nlink[8]; /* # of links at archive creation */ + char c_mtime[8]; /* modification time */ + char c_filesize[8]; /* length of file in bytes */ + char c_maj[8]; /* block/char major # */ + char c_min[8]; /* block/char minor # */ + char c_rmaj[8]; /* special file major # */ + char c_rmin[8]; /* special file minor # */ + char c_namesize[8]; /* length of pathname */ + char c_chksum[8]; /* 0 OR CRC of bytes of FILE data */ +} HD_VCPIO; + +#define VMAGIC 070701 /* sVr4 new portable archive id */ +#define VCMAGIC 070702 /* sVr4 new portable archive id CRC */ +#ifdef _PAX_ +#define AVMAGIC "070701" /* ascii string of above */ +#define AVCMAGIC "070702" /* ascii string of above */ +#define VCPIO_PAD(x) ((4 - ((x) & 3)) & 3) /* pad to next 4 byte word */ +#define VCPIO_MASK 0xffffffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ diff --git a/commands/pax/err.h b/commands/pax/err.h new file mode 100644 index 000000000..e69de29bb diff --git a/commands/pax/extern.h b/commands/pax/extern.h new file mode 100644 index 000000000..67e0eca10 --- /dev/null +++ b/commands/pax/extern.h @@ -0,0 +1,293 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 4/18/94 + * $FreeBSD: src/bin/pax/extern.h,v 1.16 2005/01/12 03:25:55 brian Exp $ + */ + +/* + * External references from each source file + */ + +/* + * ar_io.c + */ +extern const char *arcname; +extern const char *gzip_program; +int ar_open(const char *); +void ar_close(void); +void ar_drain(void); +int ar_set_wr(void); +int ar_app_ok(void); +int ar_read(char *, int); +int ar_write(char *, int); +int ar_rdsync(void); +int ar_fow(off_t, off_t *); +int ar_rev(off_t ); +int ar_next(void); + +/* + * ar_subs.c + */ +extern u_long flcnt; +void list(void); +void extract(void); +void append(void); +void archive(void); +void copy(void); + +/* + * buf_subs.c + */ +extern int blksz; +extern int wrblksz; +extern int maxflt; +extern int rdblksz; +extern off_t wrlimit; +extern off_t rdcnt; +extern off_t wrcnt; +int wr_start(void); +int rd_start(void); +void cp_start(void); +int appnd_start(off_t); +int rd_sync(void); +void pback(char *, int); +int rd_skip(off_t); +void wr_fin(void); +int wr_rdbuf(char *, int); +int rd_wrbuf(char *, int); +int wr_skip(off_t); +int wr_rdfile(ARCHD *, int, off_t *); +int rd_wrfile(ARCHD *, int, off_t *); +void cp_file(ARCHD *, int, int); +int buf_fill(void); +int buf_flush(int); + +/* + * cache.c + */ +int uidtb_start(void); +int gidtb_start(void); +int usrtb_start(void); +int grptb_start(void); +const char * name_uid(uid_t, int); +const char * name_gid(gid_t, int); +int uid_name(char *, uid_t *); +int gid_name(char *, gid_t *); + +/* + * cpio.c + */ +int cpio_strd(void); +int cpio_trail(ARCHD *); +int cpio_endwr(void); +int cpio_id(char *, int); +int cpio_rd(ARCHD *, char *); +off_t cpio_endrd(void); +int cpio_stwr(void); +int cpio_wr(ARCHD *); +int vcpio_id(char *, int); +int crc_id(char *, int); +int crc_strd(void); +int vcpio_rd(ARCHD *, char *); +off_t vcpio_endrd(void); +int crc_stwr(void); +int vcpio_wr(ARCHD *); +int bcpio_id(char *, int); +int bcpio_rd(ARCHD *, char *); +off_t bcpio_endrd(void); +int bcpio_wr(ARCHD *); + +/* + * file_subs.c + */ +int file_creat(ARCHD *); +void file_close(ARCHD *, int); +int lnk_creat(ARCHD *); +int cross_lnk(ARCHD *); +int chk_same(ARCHD *); +int node_creat(ARCHD *); +int unlnk_exist(char *, int); +int chk_path(char *, uid_t, gid_t); +void set_ftime(char *fnm, time_t mtime, time_t atime, int frc); +int set_ids(char *, uid_t, gid_t); +int set_lids(char *, uid_t, gid_t); +void set_pmode(char *, mode_t); +int file_write(int, char *, int, int *, int *, int, char *); +void file_flush(int, char *, int); +void rdfile_close(ARCHD *, int *); +int set_crc(ARCHD *, int); + +/* + * ftree.c + */ +int ftree_start(void); +int ftree_add(char *, int); +void ftree_sel(ARCHD *); +void ftree_notsel(void); +void ftree_chk(void); +int next_file(ARCHD *); + +/* + * gen_subs.c + */ +void ls_list(ARCHD *, time_t, FILE *); +void ls_tty(ARCHD *); +int l_strncpy(char *, const char *, int); +u_long asc_ul(char *, int, int); +int ul_asc(u_long, char *, int, int); +#ifndef NET2_STAT +u_quad_t asc_uqd(char *, int, int); +int uqd_asc(u_quad_t, char *, int, int); +#endif + +/* + * getoldopt.c + */ +int getoldopt(int, char **, const char *); + +/* + * options.c + */ +extern FSUB fsub[]; +extern int ford[]; +void options(int, char **); +OPLIST * opt_next(void); +int opt_add(const char *); +int bad_opt(void); +char *chdname; + +/* + * pat_rep.c + */ +int rep_add(char *); +int pat_add(char *, char *); +void pat_chk(void); +int pat_sel(ARCHD *); +int pat_match(ARCHD *); +int mod_name(ARCHD *); +int set_dest(ARCHD *, char *, int); + +/* + * pax.c + */ +extern int act; +extern FSUB *frmt; +extern int cflag; +extern int cwdfd; +extern int dflag; +extern int iflag; +extern int kflag; +extern int lflag; +extern int nflag; +extern int tflag; +extern int uflag; +extern int vflag; +extern int Dflag; +extern int Hflag; +extern int Lflag; +extern int Xflag; +extern int Yflag; +extern int Zflag; +extern int vfpart; +extern int patime; +extern int pmtime; +extern int nodirs; +extern int pmode; +extern int pids; +extern int rmleadslash; +extern int exit_val; +extern int docrc; +extern char *dirptr; +extern const char *argv0; +extern FILE *listf; +extern char *tempfile; +extern char *tempbase; + +void sig_cleanup(int); + +/* + * sel_subs.c + */ +int sel_chk(ARCHD *); +int grp_add(char *); +int usr_add(char *); +int trng_add(char *); + +/* + * tables.c + */ +int lnk_start(void); +int chk_lnk(ARCHD *); +void purg_lnk(ARCHD *); +void lnk_end(void); +int ftime_start(void); +int chk_ftime(ARCHD *); +int name_start(void); +int add_name(char *, int, char *); +void sub_name(char *, int *, size_t); +int dev_start(void); +int add_dev(ARCHD *); +int map_dev(ARCHD *, u_long, u_long); +int atdir_start(void); +void atdir_end(void); +void add_atdir(char *, dev_t, ino_t, time_t, time_t); +int get_atdir(dev_t, ino_t, time_t *, time_t *); +int dir_start(void); +void add_dir(char *, int, struct stat *, int); +void proc_dir(void); +u_int st_hash(char *, int, int); + +/* + * tar.c + */ +int tar_endwr(void); +off_t tar_endrd(void); +int tar_trail(char *, int, int *); +int tar_id(char *, int); +int tar_opt(void); +int tar_rd(ARCHD *, char *); +int tar_wr(ARCHD *); +int ustar_strd(void); +int ustar_stwr(void); +int ustar_id(char *, int); +int ustar_rd(ARCHD *, char *); +int ustar_wr(ARCHD *); + +/* + * tty_subs.c + */ +int tty_init(void); +void tty_prnt(const char *, ...); +int tty_read(char *, int); +void paxwarn(int, const char *, ...); +void syswarn(int, int, const char *, ...); diff --git a/commands/pax/fgetln.c b/commands/pax/fgetln.c new file mode 100644 index 000000000..f39aa9235 --- /dev/null +++ b/commands/pax/fgetln.c @@ -0,0 +1,38 @@ + +#include +#include +#include + +char * +fgetln(FILE *fp, size_t *lenp) +{ +#define EXTRA 80 + char *buf = NULL; + int used = 0, len = 0, remain = 0, final = 0; + while(!final) { + char *b; + int r; + if(remain < EXTRA) { + int newlen; + char *newbuf; + newlen = len + EXTRA; + if(!(newbuf = realloc(buf, newlen))) { + if(buf) free(buf); + return NULL; + } + buf = newbuf; + len = newlen; + remain += EXTRA; + } + buf[used] = '\0'; + if(!fgets(buf + used, remain, fp)) + break; + r = strlen(buf+used); + used += r; + remain -= r; + len += r; + } + *lenp = len; + return buf; +} + diff --git a/commands/pax/file_subs.c b/commands/pax/file_subs.c new file mode 100644 index 000000000..30391ec60 --- /dev/null +++ b/commands/pax/file_subs.c @@ -0,0 +1,984 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)file_subs.c 8.1 (Berkeley) 5/31/93"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "options.h" +#include "extern.h" + +static int +mk_link(char *,struct stat *,char *, int); + +/* + * routines that deal with file operations such as: creating, removing; + * and setting access modes, uid/gid and times of files + */ + +#define FILEBITS (S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) +#define SETBITS (S_ISUID | S_ISGID) +#define ABITS (FILEBITS | SETBITS) + +/* + * file_creat() + * Create and open a file. + * Return: + * file descriptor or -1 for failure + */ + +int +file_creat(ARCHD *arcn) +{ + int fd = -1; + mode_t file_mode; + int oerrno; + + /* + * assume file doesn't exist, so just try to create it, most times this + * works. We have to take special handling when the file does exist. To + * detect this, we use O_EXCL. For example when trying to create a + * file and a character device or fifo exists with the same name, we + * can accidently open the device by mistake (or block waiting to open) + * If we find that the open has failed, then figure spend the effort to + * figure out why. This strategy was found to have better average + * performance in common use than checking the file (and the path) + * first with lstat. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, + file_mode)) >= 0) + return(fd); + + /* + * the file seems to exist. First we try to get rid of it (found to be + * the second most common failure when traced). If this fails, only + * then we go to the expense to check and create the path to the file + */ + if (unlnk_exist(arcn->name, arcn->type) != 0) + return(-1); + + for (;;) { + /* + * try to open it again, if this fails, check all the nodes in + * the path and give it a final try. if chk_path() finds that + * it cannot fix anything, we will skip the last attempt + */ + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC, + file_mode)) >= 0) + break; + oerrno = errno; + if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { + syswarn(1, oerrno, "Unable to create %s", arcn->name); + return(-1); + } + } + return(fd); +} + +/* + * file_close() + * Close file descriptor to a file just created by pax. Sets modes, + * ownership and times as required. + * Return: + * 0 for success, -1 for failure + */ + +void +file_close(ARCHD *arcn, int fd) +{ + int res = 0; + + if (fd < 0) + return; + if (close(fd) < 0) + syswarn(0, errno, "Unable to close file descriptor on %s", + arcn->name); + + /* + * set owner/groups first as this may strip off mode bits we want + * then set file permission modes. Then set file access and + * modification times. + */ + if (pids) + res = set_ids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid); + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode) + set_pmode(arcn->name, arcn->sb.st_mode); + if (patime || pmtime) + set_ftime(arcn->name, arcn->sb.st_mtime, arcn->sb.st_atime, 0); +} + +/* + * lnk_creat() + * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name + * must exist; + * Return: + * 0 if ok, -1 otherwise + */ + +int +lnk_creat(ARCHD *arcn) +{ + struct stat sb; + + /* + * we may be running as root, so we have to be sure that link target + * is not a directory, so we lstat and check + */ + if (lstat(arcn->ln_name, &sb) < 0) { + syswarn(1,errno,"Unable to link to %s from %s", arcn->ln_name, + arcn->name); + return(-1); + } + + if (S_ISDIR(sb.st_mode)) { + paxwarn(1, "A hard link to the directory %s is not allowed", + arcn->ln_name); + return(-1); + } + + return(mk_link(arcn->ln_name, &sb, arcn->name, 0)); +} + +/* + * cross_lnk() + * Create a hard link to arcn->org_name from arcn->name. Only used in copy + * with the -l flag. No warning or error if this does not succeed (we will + * then just create the file) + * Return: + * 1 if copy() should try to create this file node + * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self). + */ + +int +cross_lnk(ARCHD *arcn) +{ + /* + * try to make a link to original file (-l flag in copy mode). make sure + * we do not try to link to directories in case we are running as root + * (and it might succeed). + */ + if (arcn->type == PAX_DIR) + return(1); + return(mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1)); +} + +/* + * chk_same() + * In copy mode if we are not trying to make hard links between the src + * and destinations, make sure we are not going to overwrite ourselves by + * accident. This slows things down a little, but we have to protect all + * those people who make typing errors. + * Return: + * 1 the target does not exist, go ahead and copy + * 0 skip it file exists (-k) or may be the same as source file + */ + +int +chk_same(ARCHD *arcn) +{ + struct stat sb; + + /* + * if file does not exist, return. if file exists and -k, skip it + * quietly + */ + if (lstat(arcn->name, &sb) < 0) + return(1); + if (kflag) + return(0); + + /* + * better make sure the user does not have src == dest by mistake + */ + if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) { + paxwarn(1, "Unable to copy %s, file would overwrite itself", + arcn->name); + return(0); + } + return(1); +} + +/* + * mk_link() + * try to make a hard link between two files. if ign set, we do not + * complain. + * Return: + * 0 if successful (or we are done with this file but no error, such as + * finding the from file exists and the user has set -k). + * 1 when ign was set to indicates we could not make the link but we + * should try to copy/extract the file as that might work (and is an + * allowed option). -1 an error occurred. + */ + +static int +mk_link(char *to, struct stat *to_sb, char *from, + int ign) +{ + struct stat sb; + int oerrno; + + /* + * if from file exists, it has to be unlinked to make the link. If the + * file exists and -k is set, skip it quietly + */ + if (lstat(from, &sb) == 0) { + if (kflag) + return(0); + + /* + * make sure it is not the same file, protect the user + */ + if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) { + paxwarn(1, "Unable to link file %s to itself", to); + return(-1);; + } + + /* + * try to get rid of the file, based on the type + */ + if (S_ISDIR(sb.st_mode)) { + if (rmdir(from) < 0) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + } else if (unlink(from) < 0) { + if (!ign) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + return(1); + } + } + + /* + * from file is gone (or did not exist), try to make the hard link. + * if it fails, check the path and try it again (if chk_path() says to + * try again) + */ + for (;;) { + if (link(to, from) == 0) + break; + oerrno = errno; + if (!nodirs && chk_path(from, to_sb->st_uid, to_sb->st_gid) == 0) + continue; + if (!ign) { + syswarn(1, oerrno, "Could not link to %s from %s", to, + from); + return(-1); + } + return(1); + } + + /* + * all right the link was made + */ + return(0); +} + +/* + * node_creat() + * create an entry in the file system (other than a file or hard link). + * If successful, sets uid/gid modes and times as required. + * Return: + * 0 if ok, -1 otherwise + */ + +int +node_creat(ARCHD *arcn) +{ + int res; + int ign = 0; + int oerrno; + int pass = 0; + mode_t file_mode; + struct stat sb; + + /* + * create node based on type, if that fails try to unlink the node and + * try again. finally check the path and try again. As noted in the + * file and link creation routines, this method seems to exhibit the + * best performance in general use workloads. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + + for (;;) { + switch(arcn->type) { + case PAX_DIR: + res = mkdir(arcn->name, file_mode); + if (ign) + res = 0; + break; + case PAX_CHR: + file_mode |= S_IFCHR; + res = mknod(arcn->name, file_mode, arcn->sb.st_rdev); + break; + case PAX_BLK: + file_mode |= S_IFBLK; + res = mknod(arcn->name, file_mode, arcn->sb.st_rdev); + break; + case PAX_FIF: + res = mkfifo(arcn->name, file_mode); + break; + case PAX_SCK: + /* + * Skip sockets, operation has no meaning under BSD + */ + paxwarn(0, + "%s skipped. Sockets cannot be copied or extracted", + arcn->name); + return(-1); + case PAX_SLK: + res = symlink(arcn->ln_name, arcn->name); + break; + case PAX_CTG: + case PAX_HLK: + case PAX_HRG: + case PAX_REG: + default: + /* + * we should never get here + */ + paxwarn(0, "%s has an unknown file type, skipping", + arcn->name); + return(-1); + } + + /* + * if we were able to create the node break out of the loop, + * otherwise try to unlink the node and try again. if that + * fails check the full path and try a final time. + */ + if (res == 0) + break; + + /* + * we failed to make the node + */ + oerrno = errno; + if ((ign = unlnk_exist(arcn->name, arcn->type)) < 0) + return(-1); + + if (++pass <= 1) + continue; + + if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { + syswarn(1, oerrno, "Could not create: %s", arcn->name); + return(-1); + } + } + + /* + * we were able to create the node. set uid/gid, modes and times + */ + if (pids) + res = ((arcn->type == PAX_SLK) ? + set_lids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid) : + set_ids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid)); + else + res = 0; + + /* + * symlinks are done now. + */ + if (arcn->type == PAX_SLK) + return(0); + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT any + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode) + set_pmode(arcn->name, arcn->sb.st_mode); + + if (arcn->type == PAX_DIR && strcmp(NM_CPIO, argv0) != 0) { + /* + * Dirs must be processed again at end of extract to set times + * and modes to agree with those stored in the archive. However + * to allow extract to continue, we may have to also set owner + * rights. This allows nodes in the archive that are children + * of this directory to be extracted without failure. Both time + * and modes will be fixed after the entire archive is read and + * before pax exits. + */ + if (access(arcn->name, R_OK | W_OK | X_OK) < 0) { + if (lstat(arcn->name, &sb) < 0) { + syswarn(0, errno,"Could not access %s (stat)", + arcn->name); + set_pmode(arcn->name,file_mode | S_IRWXU); + } else { + /* + * We have to add rights to the dir, so we make + * sure to restore the mode. The mode must be + * restored AS CREATED and not as stored if + * pmode is not set. + */ + set_pmode(arcn->name, + ((sb.st_mode & FILEBITS) | S_IRWXU)); + if (!pmode) + arcn->sb.st_mode = sb.st_mode; + } + + /* + * we have to force the mode to what was set here, + * since we changed it from the default as created. + */ + add_dir(arcn->name, arcn->nlen, &(arcn->sb), 1); + } else if (pmode || patime || pmtime) + add_dir(arcn->name, arcn->nlen, &(arcn->sb), 0); + } + + if (patime || pmtime) + set_ftime(arcn->name, arcn->sb.st_mtime, arcn->sb.st_atime, 0); + return(0); +} + +/* + * unlnk_exist() + * Remove node from file system with the specified name. We pass the type + * of the node that is going to replace it. When we try to create a + * directory and find that it already exists, we allow processing to + * continue as proper modes etc will always be set for it later on. + * Return: + * 0 is ok to proceed, no file with the specified name exists + * -1 we were unable to remove the node, or we should not remove it (-k) + * 1 we found a directory and we were going to create a directory. + */ + +int +unlnk_exist(char *name, int type) +{ + struct stat sb; + + /* + * the file does not exist, or -k we are done + */ + if (lstat(name, &sb) < 0) + return(0); + if (kflag) + return(-1); + + if (S_ISDIR(sb.st_mode)) { + /* + * try to remove a directory, if it fails and we were going to + * create a directory anyway, tell the caller (return a 1) + */ + if (rmdir(name) < 0) { + if (type == PAX_DIR) + return(1); + syswarn(1,errno,"Unable to remove directory %s", name); + return(-1); + } + return(0); + } + + /* + * try to get rid of all non-directory type nodes + */ + if (unlink(name) < 0) { + syswarn(1, errno, "Could not unlink %s", name); + return(-1); + } + return(0); +} + +/* + * chk_path() + * We were trying to create some kind of node in the file system and it + * failed. chk_path() makes sure the path up to the node exists and is + * writeable. When we have to create a directory that is missing along the + * path somewhere, the directory we create will be set to the same + * uid/gid as the file has (when uid and gid are being preserved). + * NOTE: this routine is a real performance loss. It is only used as a + * last resort when trying to create entries in the file system. + * Return: + * -1 when it could find nothing it is allowed to fix. + * 0 otherwise + */ + +int +chk_path( char *name, uid_t st_uid, gid_t st_gid) +{ + char *spt = name; + struct stat sb; + int retval = -1; + + /* + * watch out for paths with nodes stored directly in / (e.g. /bozo) + */ + if (*spt == '/') + ++spt; + + for(;;) { + /* + * work foward from the first / and check each part of the path + */ + spt = strchr(spt, '/'); + if (spt == NULL) + break; + *spt = '\0'; + + /* + * if it exists we assume it is a directory, it is not within + * the spec (at least it seems to read that way) to alter the + * file system for nodes NOT EXPLICITLY stored on the archive. + * If that assumption is changed, you would test the node here + * and figure out how to get rid of it (probably like some + * recursive unlink()) or fix up the directory permissions if + * required (do an access()). + */ + if (lstat(name, &sb) == 0) { + *(spt++) = '/'; + continue; + } + + /* + * the path fails at this point, see if we can create the + * needed directory and continue on + */ + if (mkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) < 0) { + *spt = '/'; + retval = -1; + break; + } + + /* + * we were able to create the directory. We will tell the + * caller that we found something to fix, and it is ok to try + * and create the node again. + */ + retval = 0; + if (pids) + (void)set_ids(name, st_uid, st_gid); + + /* + * make sure the user doen't have some strange umask that + * causes this newly created directory to be unusable. We fix + * the modes and restore them back to the creation default at + * the end of pax + */ + if ((access(name, R_OK | W_OK | X_OK) < 0) && + (lstat(name, &sb) == 0)) { + set_pmode(name, ((sb.st_mode & FILEBITS) | S_IRWXU)); + add_dir(name, spt - name, &sb, 1); + } + *(spt++) = '/'; + continue; + } + return(retval); +} + +/* + * set_ftime() + * Set the access time and modification time for a named file. If frc is + * non-zero we force these times to be set even if the user did not + * request access and/or modification time preservation (this is also + * used by -t to reset access times). + * When ign is zero, only those times the user has asked for are set, the + * other ones are left alone. We do not assume the un-documented feature + * of many utimes() implementations that consider a 0 time value as a do + * not set request. + */ + +void +set_ftime(char *fnm, time_t mtime, time_t atime, int frc) +{ +#if 0 + static struct timeval tv[2] = {{0L, 0L}, {0L, 0L}}; +#endif + struct stat sb; + struct utimbuf ut; + +#if 0 + tv[0].tv_sec = atime; + tv[1].tv_sec = mtime; +#endif + + ut.actime = atime; + ut.modtime = mtime; + + if (!frc && (!patime || !pmtime)) { + /* + * if we are not forcing, only set those times the user wants + * set. We get the current values of the times if we need them. + */ + if (lstat(fnm, &sb) == 0) { + if (!patime) + ut.actime = sb.st_atime; + if (!pmtime) + ut.modtime = sb.st_mtime; + } else + syswarn(0,errno,"Unable to obtain file stats %s", fnm); + } + + /* + * set the times + */ + if (utime(fnm, &ut) < 0) + syswarn(1, errno, "Access/modification time set failed on: %s", + fnm); + return; +} + +/* + * set_ids() + * set the uid and gid of a file system node + * Return: + * 0 when set, -1 on failure + */ + +int +set_ids(char *fnm, uid_t uid, gid_t gid) +{ + if (chown(fnm, uid, gid) < 0) { + /* + * ignore EPERM unless in verbose mode or being run by root. + * if running as pax, POSIX requires a warning. + */ + if (strcmp(NM_PAX, argv0) == 0 || errno != EPERM || vflag || + geteuid() == 0) + syswarn(1, errno, "Unable to set file uid/gid of %s", + fnm); + return(-1); + } + return(0); +} + +/* + * set_lids() + * set the uid and gid of a file system node + * Return: + * 0 when set, -1 on failure + */ + +int +set_lids(char *fnm, uid_t uid, gid_t gid) +{ + if (chown(fnm, uid, gid) < 0) { + /* + * ignore EPERM unless in verbose mode or being run by root. + * if running as pax, POSIX requires a warning. + */ + if (strcmp(NM_PAX, argv0) == 0 || errno != EPERM || vflag || + geteuid() == 0) + syswarn(1, errno, "Unable to set file uid/gid of %s", + fnm); + return(-1); + } + return(0); +} + +/* + * set_pmode() + * Set file access mode + */ + +void +set_pmode(char *fnm, mode_t mode) +{ + mode &= ABITS; + if (chmod(fnm, mode) < 0) + syswarn(1, errno, "Could not set permissions on %s", fnm); + return; +} + +/* + * file_write() + * Write/copy a file (during copy or archive extract). This routine knows + * how to copy files with lseek holes in it. (Which are read as file + * blocks containing all 0's but do not have any file blocks associated + * with the data). Typical examples of these are files created by dbm + * variants (.pag files). While the file size of these files are huge, the + * actual storage is quite small (the files are sparse). The problem is + * the holes read as all zeros so are probably stored on the archive that + * way (there is no way to determine if the file block is really a hole, + * we only know that a file block of all zero's can be a hole). + * At this writing, no major archive format knows how to archive files + * with holes. However, on extraction (or during copy, -rw) we have to + * deal with these files. Without detecting the holes, the files can + * consume a lot of file space if just written to disk. This replacement + * for write when passed the basic allocation size of a file system block, + * uses lseek whenever it detects the input data is all 0 within that + * file block. In more detail, the strategy is as follows: + * While the input is all zero keep doing an lseek. Keep track of when we + * pass over file block boundries. Only write when we hit a non zero + * input. once we have written a file block, we continue to write it to + * the end (we stop looking at the input). When we reach the start of the + * next file block, start checking for zero blocks again. Working on file + * block boundries significantly reduces the overhead when copying files + * that are NOT very sparse. This overhead (when compared to a write) is + * almost below the measurement resolution on many systems. Without it, + * files with holes cannot be safely copied. It does has a side effect as + * it can put holes into files that did not have them before, but that is + * not a problem since the file contents are unchanged (in fact it saves + * file space). (Except on paging files for diskless clients. But since we + * cannot determine one of those file from here, we ignore them). If this + * ever ends up on a system where CTG files are supported and the holes + * are not desired, just do a conditional test in those routines that + * call file_write() and have it call write() instead. BEFORE CLOSING THE + * FILE, make sure to call file_flush() when the last write finishes with + * an empty block. A lot of file systems will not create an lseek hole at + * the end. In this case we drop a single 0 at the end to force the + * trailing 0's in the file. + * ---Parameters--- + * rem: how many bytes left in this file system block + * isempt: have we written to the file block yet (is it empty) + * sz: basic file block allocation size + * cnt: number of bytes on this write + * str: buffer to write + * Return: + * number of bytes written, -1 on write (or lseek) error. + */ + +int +file_write(int fd, char *str, int cnt, int *rem, int *isempt, int sz, + char *name) +{ + char *pt; + char *end; + int wcnt; + char *st = str; + + /* + * while we have data to process + */ + while (cnt) { + if (!*rem) { + /* + * We are now at the start of file system block again + * (or what we think one is...). start looking for + * empty blocks again + */ + *isempt = 1; + *rem = sz; + } + + /* + * only examine up to the end of the current file block or + * remaining characters to write, whatever is smaller + */ + wcnt = MIN(cnt, *rem); + cnt -= wcnt; + *rem -= wcnt; + if (*isempt) { + /* + * have not written to this block yet, so we keep + * looking for zero's + */ + pt = st; + end = st + wcnt; + + /* + * look for a zero filled buffer + */ + while ((pt < end) && (*pt == '\0')) + ++pt; + + if (pt == end) { + /* + * skip, buf is empty so far + */ + if (lseek(fd, (off_t)wcnt, SEEK_CUR) < 0) { + syswarn(1,errno,"File seek on %s", + name); + return(-1); + } + st = pt; + continue; + } + /* + * drat, the buf is not zero filled + */ + *isempt = 0; + } + + /* + * have non-zero data in this file system block, have to write + */ + if (write(fd, st, wcnt) != wcnt) { + syswarn(1, errno, "Failed write to file %s", name); + return(-1); + } + st += wcnt; + } + return(st - str); +} + +/* + * file_flush() + * when the last file block in a file is zero, many file systems will not + * let us create a hole at the end. To get the last block with zeros, we + * write the last BYTE with a zero (back up one byte and write a zero). + */ + +void +file_flush(int fd, char *fname, int isempt) +{ + static char blnk[] = "\0"; + + /* + * silly test, but make sure we are only called when the last block is + * filled with all zeros. + */ + if (!isempt) + return; + + /* + * move back one byte and write a zero + */ + if (lseek(fd, (off_t)-1, SEEK_CUR) < 0) { + syswarn(1, errno, "Failed seek on file %s", fname); + return; + } + + if (write(fd, blnk, 1) < 0) + syswarn(1, errno, "Failed write to file %s", fname); + return; +} + +/* + * rdfile_close() + * close a file we have beed reading (to copy or archive). If we have to + * reset access time (tflag) do so (the times are stored in arcn). + */ + +void +rdfile_close(ARCHD *arcn, int *fd) +{ + /* + * make sure the file is open + */ + if (*fd < 0) + return; + + (void)close(*fd); + *fd = -1; + if (!tflag) + return; + + /* + * user wants last access time reset + */ + set_ftime(arcn->org_name, arcn->sb.st_mtime, arcn->sb.st_atime, 1); + return; +} + +/* + * set_crc() + * read a file to calculate its crc. This is a real drag. Archive formats + * that have this, end up reading the file twice (we have to write the + * header WITH the crc before writing the file contents. Oh well... + * Return: + * 0 if was able to calculate the crc, -1 otherwise + */ + +int +set_crc(ARCHD *arcn, int fd) +{ + int i; + int res; + off_t cpcnt = 0L; + u_long size; + unsigned long crc = 0L; + char tbuf[FILEBLK]; + struct stat sb; + + if (fd < 0) { + /* + * hmm, no fd, should never happen. well no crc then. + */ + arcn->crc = 0L; + return(0); + } + +#if 0 + /* not in minix */ + if ((size = (u_long)arcn->sb.st_blksize) > (u_long)sizeof(tbuf)) + size = (u_long)sizeof(tbuf); +#endif + + /* + * read all the bytes we think that there are in the file. If the user + * is trying to archive an active file, forget this file. + */ + for(;;) { + if ((res = read(fd, tbuf, size)) <= 0) + break; + cpcnt += res; + for (i = 0; i < res; ++i) + crc += (tbuf[i] & 0xff); + } + + /* + * safety check. we want to avoid archiving files that are active as + * they can create inconsistant archive copies. + */ + if (cpcnt != arcn->sb.st_size) + paxwarn(1, "File changed size %s", arcn->org_name); + else if (fstat(fd, &sb) < 0) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (arcn->sb.st_mtime != sb.st_mtime) + paxwarn(1, "File %s was modified during read", arcn->org_name); + else if (lseek(fd, (off_t)0L, SEEK_SET) < 0) + syswarn(1, errno, "File rewind failed on: %s", arcn->org_name); + else { + arcn->crc = crc; + return(0); + } + return(-1); +} diff --git a/commands/pax/ftree.c b/commands/pax/ftree.c new file mode 100644 index 000000000..083fa8f9f --- /dev/null +++ b/commands/pax/ftree.c @@ -0,0 +1,535 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)ftree.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "ftree.h" +#include "extern.h" + +/* + * routines to interface with the fts library function. + * + * file args supplied to pax are stored on a single linked list (of type FTREE) + * and given to fts to be processed one at a time. pax "selects" files from + * the expansion of each arg into the corresponding file tree (if the arg is a + * directory, otherwise the node itself is just passed to pax). The selection + * is modified by the -n and -u flags. The user is informed when a specific + * file arg does not generate any selected files. -n keeps expanding the file + * tree arg until one of its files is selected, then skips to the next file + * arg. when the user does not supply the file trees as command line args to + * pax, they are read from stdin + */ + +static FTS *ftsp = NULL; /* current FTS handle */ +static int ftsopts; /* options to be used on fts_open */ +static char *farray[2]; /* array for passing each arg to fts */ +static FTREE *fthead = NULL; /* head of linked list of file args */ +static FTREE *fttail = NULL; /* tail of linked list of file args */ +static FTREE *ftcur = NULL; /* current file arg being processed */ +static FTSENT *ftent = NULL; /* current file tree entry */ +static int ftree_skip; /* when set skip to next file arg */ + +static int ftree_arg(void); + +/* + * ftree_start() + * initialize the options passed to fts_open() during this run of pax + * options are based on the selection of pax options by the user + * fts_start() also calls fts_arg() to open the first valid file arg. We + * also attempt to reset directory access times when -t (tflag) is set. + * Return: + * 0 if there is at least one valid file arg to process, -1 otherwise + */ + +int +ftree_start(void) +{ + /* + * Set up the operation mode of fts, open the first file arg. We must + * use FTS_NOCHDIR, as the user may have to open multiple archives and + * if fts did a chdir off into the boondocks, we may create an archive + * volume in a place where the user did not expect to. + */ + ftsopts = FTS_NOCHDIR; + + /* + * optional user flags that effect file traversal + * -H command line symlink follow only (half follow) + * -L follow sylinks (logical) + * -P do not follow sylinks (physical). This is the default. + * -X do not cross over mount points + * -t preserve access times on files read. + * -n select only the first member of a file tree when a match is found + * -d do not extract subtrees rooted at a directory arg. + */ + if (Lflag) + ftsopts |= FTS_LOGICAL; + else + ftsopts |= FTS_PHYSICAL; + if (Hflag) +# ifdef NET2_FTS + paxwarn(0, "The -H flag is not supported on this version"); +# else + ftsopts |= FTS_COMFOLLOW; +# endif + if (Xflag) + ftsopts |= FTS_XDEV; + + if ((fthead == NULL) && ((farray[0] = malloc(PAXPATHLEN+2)) == NULL)) { + paxwarn(1, "Unable to allocate memory for file name buffer"); + return(-1); + } + + if (ftree_arg() < 0) + return(-1); + if (tflag && (atdir_start() < 0)) + return(-1); + return(0); +} + +/* + * ftree_add() + * add the arg to the linked list of files to process. Each will be + * processed by fts one at a time + * Return: + * 0 if added to the linked list, -1 if failed + */ + +int +ftree_add(char *str, int chflg) +{ + FTREE *ft; + int len; + + /* + * simple check for bad args + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(0, "Invalid file name argument"); + return(-1); + } + + /* + * allocate FTREE node and add to the end of the linked list (args are + * processed in the same order they were passed to pax). Get rid of any + * trailing / the user may pass us. (watch out for / by itself). + */ + if ((ft = (FTREE *)malloc(sizeof(FTREE))) == NULL) { + paxwarn(0, "Unable to allocate memory for filename"); + return(-1); + } + + if (((len = strlen(str) - 1) > 0) && (str[len] == '/')) + str[len] = '\0'; + ft->fname = str; + ft->refcnt = 0; + ft->chflg = chflg; + ft->fow = NULL; + if (fthead == NULL) { + fttail = fthead = ft; + return(0); + } + fttail->fow = ft; + fttail = ft; + return(0); +} + +/* + * ftree_sel() + * this entry has been selected by pax. bump up reference count and handle + * -n and -d processing. + */ + +void +ftree_sel(ARCHD *arcn) +{ + /* + * set reference bit for this pattern. This linked list is only used + * when file trees are supplied pax as args. The list is not used when + * the trees are read from stdin. + */ + if (ftcur != NULL) + ftcur->refcnt = 1; + + /* + * if -n we are done with this arg, force a skip to the next arg when + * pax asks for the next file in next_file(). + * if -d we tell fts only to match the directory (if the arg is a dir) + * and not the entire file tree rooted at that point. + */ + if (nflag) + ftree_skip = 1; + + if (!dflag || (arcn->type != PAX_DIR)) + return; + + if (ftent != NULL) + (void)fts_set(ftsp, ftent, FTS_SKIP); +} + +/* + * ftree_notsel() + * this entry has not been selected by pax. + */ + +void +ftree_notsel() +{ + if (ftent != NULL) + (void)fts_set(ftsp, ftent, FTS_SKIP); +} + +/* + * ftree_chk() + * called at end on pax execution. Prints all those file args that did not + * have a selected member (reference count still 0) + */ + +void +ftree_chk(void) +{ + FTREE *ft; + int wban = 0; + + /* + * make sure all dir access times were reset. + */ + if (tflag) + atdir_end(); + + /* + * walk down list and check reference count. Print out those members + * that never had a match + */ + for (ft = fthead; ft != NULL; ft = ft->fow) { + if ((ft->refcnt > 0) || ft->chflg) + continue; + if (wban == 0) { + paxwarn(1,"WARNING! These file names were not selected:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", ft->fname); + } +} + +/* + * ftree_arg() + * Get the next file arg for fts to process. Can be from either the linked + * list or read from stdin when the user did not them as args to pax. Each + * arg is processed until the first successful fts_open(). + * Return: + * 0 when the next arg is ready to go, -1 if out of file args (or EOF on + * stdin). + */ + +static int +ftree_arg(void) +{ + char *pt; + + /* + * close off the current file tree + */ + if (ftsp != NULL) { + (void)fts_close(ftsp); + ftsp = NULL; + } + + /* + * keep looping until we get a valid file tree to process. Stop when we + * reach the end of the list (or get an eof on stdin) + */ + for(;;) { + if (fthead == NULL) { + /* + * the user didn't supply any args, get the file trees + * to process from stdin; + */ + if (fgets(farray[0], PAXPATHLEN+1, stdin) == NULL) + return(-1); + if ((pt = strchr(farray[0], '\n')) != NULL) + *pt = '\0'; + } else { + /* + * the user supplied the file args as arguments to pax + */ + if (ftcur == NULL) + ftcur = fthead; + else if ((ftcur = ftcur->fow) == NULL) + return(-1); + if (ftcur->chflg) { + /* First fchdir() back... */ + if (fchdir(cwdfd) < 0) { + syswarn(1, errno, + "Can't fchdir to starting directory"); + return(-1); + } + if (chdir(ftcur->fname) < 0) { + syswarn(1, errno, "Can't chdir to %s", + ftcur->fname); + return(-1); + } + continue; + } else + farray[0] = ftcur->fname; + } + + /* + * Watch it, fts wants the file arg stored in an array of char + * ptrs, with the last one a null. We use a two element array + * and set farray[0] to point at the buffer with the file name + * in it. We cannot pass all the file args to fts at one shot + * as we need to keep a handle on which file arg generates what + * files (the -n and -d flags need this). If the open is + * successful, return a 0. + */ + if ((ftsp = fts_open(farray, ftsopts, NULL)) != NULL) + break; + } + return(0); +} + +/* + * next_file() + * supplies the next file to process in the supplied archd structure. + * Return: + * 0 when contents of arcn have been set with the next file, -1 when done. + */ + +int +next_file(ARCHD *arcn) +{ + int cnt; + time_t atime; + time_t mtime; + + /* + * ftree_sel() might have set the ftree_skip flag if the user has the + * -n option and a file was selected from this file arg tree. (-n says + * only one member is matched for each pattern) ftree_skip being 1 + * forces us to go to the next arg now. + */ + if (ftree_skip) { + /* + * clear and go to next arg + */ + ftree_skip = 0; + if (ftree_arg() < 0) + return(-1); + } + + /* + * loop until we get a valid file to process + */ + for(;;) { + if ((ftent = fts_read(ftsp)) == NULL) { + /* + * out of files in this tree, go to next arg, if none + * we are done + */ + if (ftree_arg() < 0) + return(-1); + continue; + } + + /* + * handle each type of fts_read() flag + */ + switch(ftent->fts_info) { + case FTS_D: + case FTS_DEFAULT: + case FTS_F: + case FTS_SL: + case FTS_SLNONE: + /* + * these are all ok + */ + break; + case FTS_DP: + /* + * already saw this directory. If the user wants file + * access times reset, we use this to restore the + * access time for this directory since this is the + * last time we will see it in this file subtree + * remember to force the time (this is -t on a read + * directory, not a created directory). + */ +# ifdef NET2_FTS + if (!tflag || (get_atdir(ftent->fts_statb.st_dev, + ftent->fts_statb.st_ino, &mtime, &atime) < 0)) +# else + if (!tflag || (get_atdir(ftent->fts_statp->st_dev, + ftent->fts_statp->st_ino, &mtime, &atime) < 0)) +# endif + continue; + set_ftime(ftent->fts_path, mtime, atime, 1); + continue; + case FTS_DC: + /* + * fts claims a file system cycle + */ + paxwarn(1,"File system cycle found at %s",ftent->fts_path); + continue; + case FTS_DNR: +# ifdef NET2_FTS + syswarn(1, errno, +# else + syswarn(1, ftent->fts_errno, +# endif + "Unable to read directory %s", ftent->fts_path); + continue; + case FTS_ERR: +# ifdef NET2_FTS + syswarn(1, errno, +# else + syswarn(1, ftent->fts_errno, +# endif + "File system traversal error"); + continue; + case FTS_NS: + case FTS_NSOK: +# ifdef NET2_FTS + syswarn(1, errno, +# else + syswarn(1, ftent->fts_errno, +# endif + "Unable to access %s", ftent->fts_path); + continue; + } + + /* + * ok got a file tree node to process. copy info into arcn + * structure (initialize as required) + */ + arcn->skip = 0; + arcn->pad = 0; + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; +# ifdef NET2_FTS + arcn->sb = ftent->fts_statb; +# else + arcn->sb = *(ftent->fts_statp); +# endif + + /* + * file type based set up and copy into the arcn struct + * SIDE NOTE: + * we try to reset the access time on all files and directories + * we may read when the -t flag is specified. files are reset + * when we close them after copying. we reset the directories + * when we are done with their file tree (we also clean up at + * end in case we cut short a file tree traversal). However + * there is no way to reset access times on symlinks. + */ + switch(S_IFMT & arcn->sb.st_mode) { + case S_IFDIR: + arcn->type = PAX_DIR; + if (!tflag) + break; + add_atdir(ftent->fts_path, arcn->sb.st_dev, + arcn->sb.st_ino, arcn->sb.st_mtime, + arcn->sb.st_atime); + break; + case S_IFCHR: + arcn->type = PAX_CHR; + break; + case S_IFBLK: + arcn->type = PAX_BLK; + break; + case S_IFREG: + /* + * only regular files with have data to store on the + * archive. all others will store a zero length skip. + * the skip field is used by pax for actual data it has + * to read (or skip over). + */ + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + case S_IFLNK: + arcn->type = PAX_SLK; + /* + * have to read the symlink path from the file + */ + if ((cnt = readlink(ftent->fts_path, arcn->ln_name, + PAXPATHLEN - 1)) < 0) { + syswarn(1, errno, "Unable to read symlink %s", + ftent->fts_path); + continue; + } + /* + * set link name length, watch out readlink does not + * always NUL terminate the link path + */ + arcn->ln_name[cnt] = '\0'; + arcn->ln_nlen = cnt; + break; +#if 0 + case S_IFSOCK: + /* + * under BSD storing a socket is senseless but we will + * let the format specific write function make the + * decision of what to do with it. + */ + arcn->type = PAX_SCK; + break; +#endif + case S_IFIFO: + arcn->type = PAX_FIF; + break; + } + break; + } + + /* + * copy file name, set file name length + */ + arcn->nlen = l_strncpy(arcn->name, ftent->fts_path, sizeof(arcn->name) - 1); + arcn->name[arcn->nlen] = '\0'; + arcn->org_name = ftent->fts_path; + return(0); +} diff --git a/commands/pax/ftree.h b/commands/pax/ftree.h new file mode 100644 index 000000000..fe18dcee9 --- /dev/null +++ b/commands/pax/ftree.h @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ftree.h 8.1 (Berkeley) 5/31/93 + * $FreeBSD: src/bin/pax/ftree.h,v 1.7 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * Data structure used by the ftree.c routines to store the file args to be + * handed to fts(). It keeps a reference count of which args generated a + * "selected" member + */ + +typedef struct ftree { + char *fname; /* file tree name */ + int refcnt; /* has tree had a selected file? */ + int chflg; /* change directory flag */ + struct ftree *fow; /* pointer to next entry on list */ +} FTREE; diff --git a/commands/pax/gen_subs.c b/commands/pax/gen_subs.c new file mode 100644 index 000000000..5785a694a --- /dev/null +++ b/commands/pax/gen_subs.c @@ -0,0 +1,414 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)gen_subs.c 8.1 (Berkeley) 5/31/93"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" + +/* + * a collection of general purpose subroutines used by pax + */ + +/* + * constants used by ls_list() when printing out archive members + */ +#define MODELEN 20 +#define DATELEN 64 +#define SIXMONTHS ((365 / 2) * 86400) +#define CURFRMTM "%b %e %H:%M" +#define OLDFRMTM "%b %e %Y" +#define CURFRMTD "%e %b %H:%M" +#define OLDFRMTD "%e %b %Y" +#ifndef UT_NAMESIZE +#define UT_NAMESIZE 8 +#endif +#define UT_GRPSIZE 6 + +static int d_first = -1; + +/* + * ls_list() + * list the members of an archive in ls format + */ + +void +ls_list(ARCHD *arcn, time_t now, FILE *fp) +{ + struct stat *sbp; + char f_mode[MODELEN]; + char f_date[DATELEN]; + const char *timefrmt; + + /* + * if not verbose, just print the file name + */ + if (!vflag) { + (void)fprintf(fp, "%s\n", arcn->name); + (void)fflush(fp); + return; + } + + if (d_first < 0) + d_first = 0; + /* + * user wants long mode + */ + sbp = &(arcn->sb); +#if 0 + strmode(sbp->st_mode, f_mode); +#else + strcpy(f_mode, ""); +#endif + + /* + * time format based on age compared to the time pax was started. + */ + if ((sbp->st_mtime + SIXMONTHS) <= now) + timefrmt = d_first ? OLDFRMTD : OLDFRMTM; + else + timefrmt = d_first ? CURFRMTD : CURFRMTM; + + /* + * print file mode, link count, uid, gid and time + */ +#if 0 + if (strftime(f_date,DATELEN,timefrmt,localtime(&(sbp->st_mtime))) == 0) +#endif + f_date[0] = '\0'; + (void)fprintf(fp, "%s%2u %-*s %-*s ", f_mode, sbp->st_nlink, + UT_NAMESIZE, name_uid(sbp->st_uid, 1), UT_GRPSIZE, + name_gid(sbp->st_gid, 1)); + + /* + * print device id's for devices, or sizes for other nodes + */ + if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK)) +# ifdef NET2_STAT + (void)fprintf(fp, "%4u,%4u ", major(sbp->st_rdev), + minor(sbp->st_rdev)); +# else + (void)fprintf(fp, "%4lu,%4lu ", (unsigned long)major(sbp->st_rdev), + (unsigned long)minor(sbp->st_rdev)); +# endif + else { +# ifdef NET2_STAT + (void)fprintf(fp, "%9lu ", sbp->st_size); +# else + (void)fprintf(fp, "%9ju ", (uintmax_t)sbp->st_size); +# endif + } + + /* + * print name and link info for hard and soft links + */ + (void)fprintf(fp, "%s %s", f_date, arcn->name); + if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + (void)fprintf(fp, " == %s\n", arcn->ln_name); + else if (arcn->type == PAX_SLK) + (void)fprintf(fp, " => %s\n", arcn->ln_name); + else + (void)putc('\n', fp); + (void)fflush(fp); + return; +} + +/* + * tty_ls() + * print a short summary of file to tty. + */ + +void +ls_tty(ARCHD *arcn) +{ + char f_date[DATELEN]; + char f_mode[MODELEN]; + const char *timefrmt; + + if (d_first < 0) + d_first = 0; + + if ((arcn->sb.st_mtime + SIXMONTHS) <= time(NULL)) + timefrmt = d_first ? OLDFRMTD : OLDFRMTM; + else + timefrmt = d_first ? CURFRMTD : CURFRMTM; + + /* + * convert time to string, and print + */ +#if 0 + if (strftime(f_date, DATELEN, timefrmt, + localtime(&(arcn->sb.st_mtime))) == 0) +#endif + f_date[0] = '\0'; +#if 0 + strmode(arcn->sb.st_mode, f_mode); +#else + strcpy(f_mode, ""); +#endif + tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name); + return; +} + +/* + * l_strncpy() + * copy src to dest up to len chars (stopping at first '\0'). + * when src is shorter than len, pads to len with '\0'. + * Return: + * number of chars copied. (Note this is a real performance win over + * doing a strncpy(), a strlen(), and then a possible memset()) + */ + +int +l_strncpy(char *dest, const char *src, int len) +{ + char *stop; + char *start; + + stop = dest + len; + start = dest; + while ((dest < stop) && (*src != '\0')) + *dest++ = *src++; + len = dest - start; + while (dest < stop) + *dest++ = '\0'; + return(len); +} + +/* + * asc_ul() + * convert hex/octal character string into a u_long. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * unsigned long value + */ + +u_long +asc_ul(char *str, int len, int base) +{ + char *stop; + u_long tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * ul_asc() + * convert an unsigned long into an hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +ul_asc(u_long val, char *str, int len, int base) +{ + char *pt; + u_long digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + if ((val = (val >> 4)) == (u_long)0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = (val >> 3)) == (u_long)0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != (u_long)0) + return(-1); + return(0); +} + +#ifndef NET2_STAT +/* + * asc_uqd() + * convert hex/octal character string into a u_quad_t. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * u_quad_t value + */ + +u_quad_t +asc_uqd(char *str, int len, int base) +{ + char *stop; + u_quad_t tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * uqd_asc() + * convert an u_quad_t into a hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +uqd_asc(u_quad_t val, char *str, int len, int base) +{ + char *pt; + u_quad_t digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + if ((val = (val >> 4)) == (u_quad_t)0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = (val >> 3)) == (u_quad_t)0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != (u_quad_t)0) + return(-1); + return(0); +} +#endif diff --git a/commands/pax/getoldopt.c b/commands/pax/getoldopt.c new file mode 100644 index 000000000..e077298c9 --- /dev/null +++ b/commands/pax/getoldopt.c @@ -0,0 +1,70 @@ +/* $OpenBSD: getoldopt.c,v 1.4 2000/01/22 20:24:51 deraadt Exp $ */ +/* $NetBSD: getoldopt.c,v 1.3 1995/03/21 09:07:28 cgd Exp $ */ + +/*- + * Plug-compatible replacement for getopt() for parsing tar-like + * arguments. If the first argument begins with "-", it uses getopt; + * otherwise, it uses the old rules used by tar, dump, and ps. + * + * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu) and placed + * in the Pubic Domain for your edification and enjoyment. + */ + + +#include +#include +#include +#include +#include + +#include "pax.h" +#include "extern.h" + +int +getoldopt(int argc, char **argv, const char *optstring) +{ + static char *key; /* Points to next keyletter */ + static char use_getopt; /* !=0 if argv[1][0] was '-' */ + char c; + char *place; + + optarg = NULL; + + if (key == NULL) { /* First time */ + if (argc < 2) return EOF; + key = argv[1]; + if (*key == '-') + use_getopt++; + else + optind = 2; + } + + if (use_getopt) + return getopt(argc, argv, optstring); + + c = *key++; + if (c == '\0') { + key--; + return EOF; + } + place = strchr(optstring, c); + + if (place == NULL || c == ':') { + fprintf(stderr, "%s: unknown option %c\n", argv[0], c); + return('?'); + } + + place++; + if (*place == ':') { + if (optind < argc) { + optarg = argv[optind]; + optind++; + } else { + fprintf(stderr, "%s: %c argument missing\n", + argv[0], c); + return('?'); + } + } + + return(c); +} diff --git a/commands/pax/options.c b/commands/pax/options.c new file mode 100644 index 000000000..1f3afb0ff --- /dev/null +++ b/commands/pax/options.c @@ -0,0 +1,1579 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if 0 +#ifndef lint +static char sccsid[] = "@(#)options.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ +#endif + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "options.h" +#include "cpio.h" +#include "tar.h" +#include "extern.h" + +/* + * Routines which handle command line options + */ + +static char flgch[] = FLGCH; /* list of all possible flags */ +static OPLIST *ophead = NULL; /* head for format specific options -x */ +static OPLIST *optail = NULL; /* option tail */ + +static int no_op(void); +static void printflg(unsigned int); +static int c_frmt(const void *, const void *); +static off_t str_offt(char *); +static char *getline(FILE *fp); +static void pax_options(int, char **); +static void pax_usage(void); +static void tar_options(int, char **); +static void tar_usage(void); +static void cpio_options(int, char **); +static void cpio_usage(void); + +/* errors from getline */ +#define GETLINE_FILE_CORRUPT 1 +#define GETLINE_OUT_OF_MEM 2 +static int getline_error; + + +#define GZIP_CMD "gzip" /* command to run as gzip */ +#define COMPRESS_CMD "compress" /* command to run as compress */ +#define BZIP2_CMD "bzip2" /* command to run as gzip */ + +/* + * Format specific routine table - MUST BE IN SORTED ORDER BY NAME + * (see pax.h for description of each function) + * + * name, blksz, hdsz, udev, hlk, blkagn, inhead, id, st_read, + * read, end_read, st_write, write, end_write, trail, + * rd_data, wr_data, options + */ + +FSUB fsub[] = { +/* 0: OLD BINARY CPIO */ + {"bcpio", 5120, sizeof(HD_BCPIO), 1, 0, 0, 1, bcpio_id, cpio_strd, + bcpio_rd, bcpio_endrd, cpio_stwr, bcpio_wr, cpio_endwr, cpio_trail, + NULL, rd_wrfile, wr_rdfile, bad_opt}, + +/* 1: OLD OCTAL CHARACTER CPIO */ + {"cpio", 5120, sizeof(HD_CPIO), 1, 0, 0, 1, cpio_id, cpio_strd, + cpio_rd, cpio_endrd, cpio_stwr, cpio_wr, cpio_endwr, cpio_trail, + NULL, rd_wrfile, wr_rdfile, bad_opt}, + +/* 2: SVR4 HEX CPIO */ + {"sv4cpio", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, vcpio_id, cpio_strd, + vcpio_rd, vcpio_endrd, cpio_stwr, vcpio_wr, cpio_endwr, cpio_trail, + NULL, rd_wrfile, wr_rdfile, bad_opt}, + +/* 3: SVR4 HEX CPIO WITH CRC */ + {"sv4crc", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, crc_id, crc_strd, + vcpio_rd, vcpio_endrd, crc_stwr, vcpio_wr, cpio_endwr, cpio_trail, + NULL, rd_wrfile, wr_rdfile, bad_opt}, + +/* 4: OLD TAR */ + {"tar", 10240, BLKMULT, 0, 1, BLKMULT, 0, tar_id, no_op, + tar_rd, tar_endrd, no_op, tar_wr, tar_endwr, NULL, tar_trail, + rd_wrfile, wr_rdfile, tar_opt}, + +/* 5: POSIX USTAR */ + {"ustar", 10240, BLKMULT, 0, 1, BLKMULT, 0, ustar_id, ustar_strd, + ustar_rd, tar_endrd, ustar_stwr, ustar_wr, tar_endwr, NULL, tar_trail, + rd_wrfile, wr_rdfile, bad_opt}, +}; +#define F_OCPIO 0 /* format when called as cpio -6 */ +#define F_ACPIO 1 /* format when called as cpio -c */ +#define F_CPIO 3 /* format when called as cpio */ +#define F_OTAR 4 /* format when called as tar -o */ +#define F_TAR 5 /* format when called as tar */ +#define DEFLT 5 /* default write format from list above */ + +/* + * ford is the archive search order used by get_arc() to determine what kind + * of archive we are dealing with. This helps to properly id archive formats + * some formats may be subsets of others.... + */ +int ford[] = {5, 4, 3, 2, 1, 0, -1 }; + +/* + * options() + * figure out if we are pax, tar or cpio. Call the appropriate options + * parser + */ + +void +options(int argc, char **argv) +{ + + /* + * Are we acting like pax, tar or cpio (based on argv[0]) + */ + if ((argv0 = strrchr(argv[0], '/')) != NULL) + argv0++; + else + argv0 = argv[0]; + + if (strcmp(NM_TAR, argv0) == 0) { + tar_options(argc, argv); + return; + } + else if (strcmp(NM_CPIO, argv0) == 0) { + cpio_options(argc, argv); + return; + } + /* + * assume pax as the default + */ + argv0 = NM_PAX; + pax_options(argc, argv); + return; +} + +/* + * pax_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +pax_options(int argc, char **argv) +{ + int c; + size_t i; + unsigned int flg = 0; + unsigned int bflg = 0; + char *pt; + FSUB tmp; + + /* + * process option flags + */ + while ((c=getopt(argc,argv,"ab:cdf:iklno:p:rs:tuvwx:zB:DE:G:HLPT:U:XYZ")) + != -1) { + switch (c) { + case 'a': + /* + * append + */ + flg |= AF; + break; + case 'b': + /* + * specify blocksize + */ + flg |= BF; + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid block size %s", optarg); + pax_usage(); + } + break; + case 'c': + /* + * inverse match on patterns + */ + cflag = 1; + flg |= CF; + break; + case 'd': + /* + * match only dir on extract, not the subtree at dir + */ + dflag = 1; + flg |= DF; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + flg |= FF; + break; + case 'i': + /* + * interactive file rename + */ + iflag = 1; + flg |= IF; + break; + case 'k': + /* + * do not clobber files that exist + */ + kflag = 1; + flg |= KF; + break; + case 'l': + /* + * try to link src to dest with copy (-rw) + */ + lflag = 1; + flg |= LF; + break; + case 'n': + /* + * select first match for a pattern only + */ + nflag = 1; + flg |= NF; + break; + case 'o': + /* + * pass format specific options + */ + flg |= OF; + if (opt_add(optarg) < 0) + pax_usage(); + break; + case 'p': + /* + * specify file characteristic options + */ + for (pt = optarg; *pt != '\0'; ++pt) { + switch(*pt) { + case 'a': + /* + * do not preserve access time + */ + patime = 0; + break; + case 'e': + /* + * preserve user id, group id, file + * mode, access/modification times + */ + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + /* + * preserve uid/gid + */ + pids = 1; + break; + case 'p': + /* + * preserver file mode bits + */ + pmode = 1; + break; + default: + paxwarn(1, "Invalid -p string: %c", *pt); + pax_usage(); + break; + } + } + flg |= PF; + break; + case 'r': + /* + * read the archive + */ + flg |= RF; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= SF; + break; + case 't': + /* + * preserve access time on file system nodes we read + */ + tflag = 1; + flg |= TF; + break; + case 'u': + /* + * ignore those older files + */ + uflag = 1; + flg |= UF; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + flg |= VF; + break; + case 'w': + /* + * write an archive + */ + flg |= WF; + break; + case 'x': + /* + * specify an archive format on write + */ + tmp.name = optarg; + if ((frmt = (FSUB *)bsearch((void *)&tmp, (void *)fsub, + sizeof(fsub)/sizeof(FSUB), sizeof(FSUB), c_frmt)) != NULL) { + flg |= XF; + break; + } + paxwarn(1, "Unknown -x format: %s", optarg); + (void)fputs("pax: Known -x formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + (void)fprintf(stderr, " %s", fsub[i].name); + (void)fputs("\n\n", stderr); + pax_usage(); + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * non-standard option on number of bytes written on a + * single archive volume. + */ + if ((wrlimit = str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid write limit %s", optarg); + pax_usage(); + } + if (wrlimit % BLKMULT) { + paxwarn(1, "Write limit is not a %d byte multiple", + BLKMULT); + pax_usage(); + } + flg |= CBF; + break; + case 'D': + /* + * On extraction check file inode change time before the + * modification of the file name. Non standard option. + */ + Dflag = 1; + flg |= CDF; + break; + case 'E': + /* + * non-standard limit on read faults + * 0 indicates stop after first error, values + * indicate a limit, "NONE" try forever + */ + flg |= CEF; + if (strcmp(NONE, optarg) == 0) + maxflt = -1; + else if ((maxflt = atoi(optarg)) < 0) { + paxwarn(1, "Error count value must be positive"); + pax_usage(); + } + break; + case 'G': + /* + * non-standard option for selecting files within an + * archive by group (gid or name) + */ + if (grp_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CGF; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + flg |= CHF; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + flg |= CLF; + break; + case 'P': + /* + * do NOT follow symlinks (default) + */ + Lflag = 0; + flg |= CPF; + break; + case 'T': + /* + * non-standard option for selecting files within an + * archive by modification time range (lower,upper) + */ + if (trng_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CTF; + break; + case 'U': + /* + * non-standard option for selecting files within an + * archive by user (uid or name) + */ + if (usr_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CUF; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + flg |= CXF; + break; + case 'Y': + /* + * On extraction check file inode change time after the + * modification of the file name. Non standard option. + */ + Yflag = 1; + flg |= CYF; + break; + case 'Z': + /* + * On extraction check modification time after the + * modification of the file name. Non standard option. + */ + Zflag = 1; + flg |= CZF; + break; + default: + pax_usage(); + break; + } + } + + /* + * figure out the operation mode of pax read,write,extract,copy,append + * or list. check that we have not been given a bogus set of flags + * for the operation mode. + */ + if (ISLIST(flg)) { + act = LIST; + listf = stdout; + bflg = flg & BDLIST; + } else if (ISEXTRACT(flg)) { + act = EXTRACT; + bflg = flg & BDEXTR; + } else if (ISARCHIVE(flg)) { + act = ARCHIVE; + bflg = flg & BDARCH; + } else if (ISAPPND(flg)) { + act = APPND; + bflg = flg & BDARCH; + } else if (ISCOPY(flg)) { + act = COPY; + bflg = flg & BDCOPY; + } else + pax_usage(); + if (bflg) { + printflg(flg); + pax_usage(); + } + + /* + * if we are writing (ARCHIVE) we use the default format if the user + * did not specify a format. when we write during an APPEND, we will + * adopt the format of the existing archive if none was supplied. + */ + if (!(flg & XF) && (act == ARCHIVE)) + frmt = &(fsub[DEFLT]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + for (; optind < argc; optind++) + if (pat_add(argv[optind], NULL) < 0) + pax_usage(); + break; + case COPY: + if (optind >= argc) { + paxwarn(0, "Destination directory was not supplied"); + pax_usage(); + } + --argc; + dirptr = argv[argc]; + /* FALLTHROUGH */ + case ARCHIVE: + case APPND: + for (; optind < argc; optind++) + if (ftree_add(argv[optind], 0) < 0) + pax_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + + +/* + * tar_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +tar_options(int argc, char **argv) +{ + int c; + int fstdin = 0; + int Oflag = 0; + int nincfiles = 0; + int incfiles_max = 0; + struct incfile { + char *file; + char *dir; + }; + struct incfile *incfiles = NULL; + + /* + * Set default values. + */ + rmleadslash = 1; + + /* + * process option flags + */ + while ((c = getoldopt(argc, argv, + "b:cef:hjmopqruts:vwxyzBC:HI:LOPXZ014578")) != -1) { + switch(c) { + case 'b': + /* + * specify blocksize in 512-byte blocks + */ + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid block size %s", optarg); + tar_usage(); + } + wrblksz *= 512; /* XXX - check for int oflow */ + break; + case 'c': + /* + * create an archive + */ + act = ARCHIVE; + break; + case 'e': + /* + * stop after first error + */ + maxflt = 0; + break; + case 'f': + /* + * filename where the archive is stored + */ + if ((optarg[0] == '-') && (optarg[1]== '\0')) { + /* + * treat a - as stdin + */ + fstdin = 1; + arcname = NULL; + break; + } + fstdin = 0; + arcname = optarg; + break; + case 'h': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'j': + case 'y': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + if (opt_add("write_opt=nodir") < 0) + tar_usage(); + case 'O': + Oflag = 1; + break; + case 'p': + /* + * preserve uid/gid and file mode, regardless of umask + */ + pmode = 1; + pids = 1; + break; + case 'q': + /* + * select first match for a pattern only + */ + nflag = 1; + break; + case 'r': + case 'u': + /* + * append to the archive + */ + act = APPND; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + tar_usage(); + break; + } + break; + case 't': + /* + * list contents of the tape + */ + act = LIST; + break; + case 'v': + /* + * verbose operation mode + */ + vflag++; + break; + case 'w': + /* + * interactive file rename + */ + iflag = 1; + break; + case 'x': + /* + * extract an archive, preserving mode, + * and mtime if possible. + */ + act = EXTRACT; + pmtime = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * Nothing to do here, this is pax default + */ + break; + case 'C': + chdname = optarg; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + break; + case 'I': + if (++nincfiles > incfiles_max) { + incfiles_max = nincfiles + 3; + incfiles = realloc(incfiles, + sizeof(*incfiles) * incfiles_max); + if (incfiles == NULL) { + paxwarn(0, "Unable to allocate space " + "for option list"); + exit(1); + } + } + incfiles[nincfiles - 1].file = optarg; + incfiles[nincfiles - 1].dir = chdname; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'P': + /* + * do not remove leading '/' from pathnames + */ + rmleadslash = 0; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + break; + case 'Z': + /* + * use compress. + */ + gzip_program = COMPRESS_CMD; + break; + case '0': + arcname = DEV_0; + break; + case '1': + arcname = DEV_1; + break; + case '4': + arcname = DEV_4; + break; + case '5': + arcname = DEV_5; + break; + case '7': + arcname = DEV_7; + break; + case '8': + arcname = DEV_8; + break; + default: + tar_usage(); + break; + } + } + argc -= optind; + argv += optind; + + /* Traditional tar behaviour (pax uses stderr unless in list mode) */ + if (fstdin == 1 && act == ARCHIVE) + listf = stderr; + else + listf = stdout; + + /* Traditional tar behaviour (pax wants to read file list from stdin) */ + if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0) + exit(0); + + /* + * if we are writing (ARCHIVE) specify tar, otherwise run like pax + * (unless -o specified) + */ + if (act == ARCHIVE || act == APPND) + frmt = &(fsub[Oflag ? F_OTAR : F_TAR]); + else if (Oflag) { + paxwarn(1, "The -O/-o options are only valid when writing an archive"); + tar_usage(); /* only valid when writing */ + } + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + default: + { + int sawpat = 0; + char *file, *dir = NULL; + + while (nincfiles || *argv != NULL) { + /* + * If we queued up any include files, + * pull them in now. Otherwise, check + * for -I and -C positional flags. + * Anything else must be a file to + * extract. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = chdname; + } else + file = NULL; + if (file != NULL) { + FILE *fp; + char *str; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + paxwarn(1, "Unable to open file '%s' for read", file); + tar_usage(); + } + while ((str = getline(fp)) != NULL) { + if (pat_add(str, dir) < 0) + tar_usage(); + sawpat = 1; + } + if (strcmp(file, "-") != 0) + fclose(fp); + if (getline_error) { + paxwarn(1, "Problem with file '%s'", file); + tar_usage(); + } + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + chdname = *argv++; + } else if (pat_add(*argv++, chdname) < 0) + tar_usage(); + else + sawpat = 1; + } + /* + * if patterns were added, we are doing chdir() + * on a file-by-file basis, else, just one + * global chdir (if any) after opening input. + */ + if (sawpat > 0) + chdname = NULL; + } + break; + case ARCHIVE: + case APPND: + if (chdname != NULL) { /* initial chdir() */ + if (ftree_add(chdname, 1) < 0) + tar_usage(); + } + + while (nincfiles || *argv != NULL) { + char *file, *dir = NULL; + + /* + * If we queued up any include files, pull them in + * now. Otherwise, check for -I and -C positional + * flags. Anything else must be a file to include + * in the archive. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = NULL; + } else + file = NULL; + if (file != NULL) { + FILE *fp; + char *str; + + /* Set directory if needed */ + if (dir) { + if (ftree_add(dir, 1) < 0) + tar_usage(); + } + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + paxwarn(1, "Unable to open file '%s' for read", file); + tar_usage(); + } + while ((str = getline(fp)) != NULL) { + if (ftree_add(str, 0) < 0) + tar_usage(); + } + if (strcmp(file, "-") != 0) + fclose(fp); + if (getline_error) { + paxwarn(1, "Problem with file '%s'", + file); + tar_usage(); + } + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + if (ftree_add(*argv++, 1) < 0) + tar_usage(); + } else if (ftree_add(*argv++, 0) < 0) + tar_usage(); + } + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } + if (!fstdin && ((arcname == NULL) || (*arcname == '\0'))) { + arcname = getenv("TAPE"); + if ((arcname == NULL) || (*arcname == '\0')) + arcname = _PATH_DEFTAPE; + } +} + +static int +mkpath(char *path) +{ + struct stat sb; + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (stat(path, &sb)) { + if (errno != ENOENT || mkdir(path, 0777)) { + paxwarn(1, "%s", path); + return (-1); + } + } else if (!S_ISDIR(sb.st_mode)) { + syswarn(1, ENOTDIR, "%s", path); + return (-1); + } + + if (!done) + *slash = '/'; + } + + return (0); +} +/* + * cpio_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +cpio_options(int argc, char **argv) +{ + int c; + size_t i; + char *str; + FSUB tmp; + FILE *fp; + + kflag = 1; + pids = 1; + pmode = 1; + pmtime = 0; + arcname = NULL; + dflag = 1; + act = -1; + nodirs = 1; + while ((c=getopt(argc,argv,"abcdfiklmoprstuvzABC:E:F:H:I:LO:SZ6")) != -1) + switch (c) { + case 'a': + /* + * preserve access time on files read + */ + tflag = 1; + break; + case 'b': + /* + * swap bytes and half-words when reading data + */ + break; + case 'c': + /* + * ASCII cpio header + */ + frmt = &(fsub[F_ACPIO]); + break; + case 'd': + /* + * create directories as needed + */ + nodirs = 0; + break; + case 'f': + /* + * invert meaning of pattern list + */ + cflag = 1; + break; + case 'i': + /* + * restore an archive + */ + act = EXTRACT; + break; + case 'k': + break; + case 'l': + /* + * use links instead of copies when possible + */ + lflag = 1; + break; + case 'm': + /* + * preserve modification time + */ + pmtime = 1; + break; + case 'o': + /* + * create an archive + */ + act = ARCHIVE; + frmt = &(fsub[F_CPIO]); + break; + case 'p': + /* + * copy-pass mode + */ + act = COPY; + break; + case 'r': + /* + * interactively rename files + */ + iflag = 1; + break; + case 's': + /* + * swap bytes after reading data + */ + break; + case 't': + /* + * list contents of archive + */ + act = LIST; + listf = stdout; + break; + case 'u': + /* + * replace newer files + */ + kflag = 0; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'A': + /* + * append mode + */ + act = APPND; + break; + case 'B': + /* + * Use 5120 byte block size + */ + wrblksz = 5120; + break; + case 'C': + /* + * set block size in bytes + */ + wrblksz = atoi(optarg); + break; + case 'E': + /* + * file with patterns to extract or list + */ + if ((fp = fopen(optarg, "r")) == NULL) { + paxwarn(1, "Unable to open file '%s' for read", optarg); + cpio_usage(); + } + while ((str = getline(fp)) != NULL) { + pat_add(str, NULL); + } + fclose(fp); + if (getline_error) { + paxwarn(1, "Problem with file '%s'", optarg); + cpio_usage(); + } + break; + case 'F': + case 'I': + case 'O': + /* + * filename where the archive is stored + */ + if ((optarg[0] == '-') && (optarg[1]== '\0')) { + /* + * treat a - as stdin + */ + arcname = NULL; + break; + } + arcname = optarg; + break; + case 'H': + /* + * specify an archive format on write + */ + tmp.name = optarg; + if ((frmt = (FSUB *)bsearch((void *)&tmp, (void *)fsub, + sizeof(fsub)/sizeof(FSUB), sizeof(FSUB), c_frmt)) != NULL) + break; + paxwarn(1, "Unknown -H format: %s", optarg); + (void)fputs("cpio: Known -H formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + (void)fprintf(stderr, " %s", fsub[i].name); + (void)fputs("\n\n", stderr); + cpio_usage(); + break; + case 'L': + /* + * follow symbolic links + */ + Lflag = 1; + break; + case 'S': + /* + * swap halfwords after reading data + */ + break; + case 'Z': + /* + * use compress. Non standard option. + */ + gzip_program = COMPRESS_CMD; + break; + case '6': + /* + * process Version 6 cpio format + */ + frmt = &(fsub[F_OCPIO]); + break; + case '?': + default: + cpio_usage(); + break; + } + argc -= optind; + argv += optind; + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + while (*argv != NULL) + if (pat_add(*argv++, NULL) < 0) + cpio_usage(); + break; + case COPY: + if (*argv == NULL) { + paxwarn(0, "Destination directory was not supplied"); + cpio_usage(); + } + dirptr = *argv; + if (mkpath(dirptr) < 0) + cpio_usage(); + --argc; + ++argv; + /* FALLTHROUGH */ + case ARCHIVE: + case APPND: + if (*argv != NULL) + cpio_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + while ((str = getline(stdin)) != NULL) { + ftree_add(str, 0); + } + if (getline_error) { + paxwarn(1, "Problem while reading stdin"); + cpio_usage(); + } + break; + default: + cpio_usage(); + break; + } +} + +/* + * printflg() + * print out those invalid flag sets found to the user + */ + +static void +printflg(unsigned int flg) +{ + int nxt; + int pos = 0; + + (void)fprintf(stderr,"%s: Invalid combination of options:", argv0); + while ((nxt = ffs(flg)) != 0) { + flg = flg >> nxt; + pos += nxt; + (void)fprintf(stderr, " -%c", flgch[pos-1]); + } + (void)putc('\n', stderr); +} + +/* + * c_frmt() + * comparison routine used by bsearch to find the format specified + * by the user + */ + +static int +c_frmt(const void *a, const void *b) +{ + return(strcmp(((const FSUB *)a)->name, ((const FSUB *)b)->name)); +} + +/* + * opt_next() + * called by format specific options routines to get each format specific + * flag and value specified with -o + * Return: + * pointer to next OPLIST entry or NULL (end of list). + */ + +OPLIST * +opt_next(void) +{ + OPLIST *opt; + + if ((opt = ophead) != NULL) + ophead = ophead->fow; + return(opt); +} + +/* + * bad_opt() + * generic routine used to complain about a format specific options + * when the format does not support options. + */ + +int +bad_opt(void) +{ + OPLIST *opt; + + if (ophead == NULL) + return(0); + /* + * print all we were given + */ + paxwarn(1,"These format options are not supported"); + while ((opt = opt_next()) != NULL) + (void)fprintf(stderr, "\t%s = %s\n", opt->name, opt->value); + pax_usage(); + return(0); +} + +/* + * opt_add() + * breaks the value supplied to -o into an option name and value. Options + * are given to -o in the form -o name-value,name=value + * multiple -o may be specified. + * Return: + * 0 if format in name=value format, -1 if -o is passed junk. + */ + +int +opt_add(const char *str) +{ + OPLIST *opt; + char *frpt; + char *pt; + char *endpt; + char *lstr; + + if ((str == NULL) || (*str == '\0')) { + paxwarn(0, "Invalid option name"); + return(-1); + } + if ((lstr = strdup(str)) == NULL) { + paxwarn(0, "Unable to allocate space for option list"); + return(-1); + } + frpt = endpt = lstr; + + /* + * break into name and values pieces and stuff each one into a + * OPLIST structure. When we know the format, the format specific + * option function will go through this list + */ + while ((frpt != NULL) && (*frpt != '\0')) { + if ((endpt = strchr(frpt, ',')) != NULL) + *endpt = '\0'; + if ((pt = strchr(frpt, '=')) == NULL) { + paxwarn(0, "Invalid options format"); + free(lstr); + return(-1); + } + if ((opt = (OPLIST *)malloc(sizeof(OPLIST))) == NULL) { + paxwarn(0, "Unable to allocate space for option list"); + free(lstr); + return(-1); + } + *pt++ = '\0'; + opt->name = frpt; + opt->value = pt; + opt->fow = NULL; + if (endpt != NULL) + frpt = endpt + 1; + else + frpt = NULL; + if (ophead == NULL) { + optail = ophead = opt; + continue; + } + optail->fow = opt; + optail = opt; + } + return(0); +} + +/* + * str_offt() + * Convert an expression of the following forms to an off_t > 0. + * 1) A positive decimal number. + * 2) A positive decimal number followed by a b (mult by 512). + * 3) A positive decimal number followed by a k (mult by 1024). + * 4) A positive decimal number followed by a m (mult by 512). + * 5) A positive decimal number followed by a w (mult by sizeof int) + * 6) Two or more positive decimal numbers (with/without k,b or w). + * separated by x (also * for backwards compatibility), specifying + * the product of the indicated values. + * Return: + * 0 for an error, a positive value o.w. + */ + +static off_t +str_offt(char *val) +{ + char *expr; + off_t num, t; + +# ifdef NET2_STAT + num = strtol(val, &expr, 0); + if ((num == LONG_MAX) || (num <= 0) || (expr == val)) +# else + num = strtoq(val, &expr, 0); + if ((num == QUAD_MAX) || (num <= 0) || (expr == val)) +# endif + return(0); + + switch(*expr) { + case 'b': + t = num; + num *= 512; + if (t > num) + return(0); + ++expr; + break; + case 'k': + t = num; + num *= 1024; + if (t > num) + return(0); + ++expr; + break; + case 'm': + t = num; + num *= 1048576; + if (t > num) + return(0); + ++expr; + break; + case 'w': + t = num; + num *= sizeof(int); + if (t > num) + return(0); + ++expr; + break; + } + + switch(*expr) { + case '\0': + break; + case '*': + case 'x': + t = num; + num *= str_offt(expr + 1); + if (t > num) + return(0); + break; + default: + return(0); + } + return(num); +} + +char * +getline(FILE *f) +{ + char *name, *temp; + size_t len; + + name = fgetln(f, &len); + if (!name) { + getline_error = ferror(f) ? GETLINE_FILE_CORRUPT : 0; + return(0); + } + if (name[len-1] != '\n') + len++; + temp = malloc(len); + if (!temp) { + getline_error = GETLINE_OUT_OF_MEM; + return(0); + } + memcpy(temp, name, len-1); + temp[len-1] = 0; + return(temp); +} + +/* + * no_op() + * for those option functions where the archive format has nothing to do. + * Return: + * 0 + */ + +static int +no_op(void) +{ + return(0); +} + +/* + * pax_usage() + * print the usage summary to the user + */ + +void +pax_usage(void) +{ + (void)fputs("usage: pax [-cdnvz] [-E limit] [-f archive] ", stderr); + (void)fputs("[-s replstr] ... [-U user] ...", stderr); + (void)fputs("\n [-G group] ... ", stderr); + (void)fputs("[-T [from_date][,to_date]] ... ", stderr); + (void)fputs("[pattern ...]\n", stderr); + (void)fputs(" pax -r [-cdiknuvzDYZ] [-E limit] ", stderr); + (void)fputs("[-f archive] [-o options] ... \n", stderr); + (void)fputs(" [-p string] ... [-s replstr] ... ", stderr); + (void)fputs("[-U user] ... [-G group] ...\n ", stderr); + (void)fputs("[-T [from_date][,to_date]] ... ", stderr); + (void)fputs(" [pattern ...]\n", stderr); + (void)fputs(" pax -w [-dituvzHLPX] [-b blocksize] ", stderr); + (void)fputs("[ [-a] [-f archive] ] [-x format] \n", stderr); + (void)fputs(" [-B bytes] [-s replstr] ... ", stderr); + (void)fputs("[-o options] ... [-U user] ...", stderr); + (void)fputs("\n [-G group] ... ", stderr); + (void)fputs("[-T [from_date][,to_date][/[c][m]]] ... ", stderr); + (void)fputs("[file ...]\n", stderr); + (void)fputs(" pax -r -w [-diklntuvDHLPXYZ] ", stderr); + (void)fputs("[-p string] ... [-s replstr] ...", stderr); + (void)fputs("\n [-U user] ... [-G group] ... ", stderr); + (void)fputs("[-T [from_date][,to_date][/[c][m]]] ... ", stderr); + (void)fputs("\n [file ...] directory\n", stderr); + exit(1); +} + +/* + * tar_usage() + * print the usage summary to the user + */ + +void +tar_usage(void) +{ + (void)fputs("usage: tar [-]{crtux}[-befhjmopqsvwyzHLOPXZ014578] [blocksize] ", + stderr); + (void)fputs("[archive] [replstr] [-C directory] [-I file] [file ...]\n", + stderr); + exit(1); +} + +/* + * cpio_usage() + * print the usage summary to the user + */ + +void +cpio_usage(void) +{ + (void)fputs("usage: cpio -o [-aABcLvVzZ] [-C bytes] [-H format] [-O archive]\n", stderr); + (void)fputs(" [-F archive] < name-list [> archive]\n", stderr); + (void)fputs(" cpio -i [-bBcdfmnrsStuvVzZ6] [-C bytes] [-E file] [-H format]\n", stderr); + (void)fputs(" [-I archive] [-F archive] [pattern...] [< archive]\n", stderr); + (void)fputs(" cpio -p [-adlLmuvV] destination-directory < name-list\n", stderr); + exit(1); +} diff --git a/commands/pax/options.h b/commands/pax/options.h new file mode 100644 index 000000000..86351e859 --- /dev/null +++ b/commands/pax/options.h @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)options.h 8.2 (Berkeley) 4/18/94 + * $FreeBSD: src/bin/pax/options.h,v 1.6 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * argv[0] names. Used for tar and cpio emulation + */ + +#define NM_TAR "tar" +#define NM_CPIO "cpio" +#define NM_PAX "pax" + +/* + * Constants used to specify the legal sets of flags in pax. For each major + * operation mode of pax, a set of illegal flags is defined. If any one of + * those illegal flags are found set, we scream and exit + */ +#define NONE "none" + +/* + * flags (one for each option). + */ +#define AF 0x00000001 +#define BF 0x00000002 +#define CF 0x00000004 +#define DF 0x00000008 +#define FF 0x00000010 +#define IF 0x00000020 +#define KF 0x00000040 +#define LF 0x00000080 +#define NF 0x00000100 +#define OF 0x00000200 +#define PF 0x00000400 +#define RF 0x00000800 +#define SF 0x00001000 +#define TF 0x00002000 +#define UF 0x00004000 +#define VF 0x00008000 +#define WF 0x00010000 +#define XF 0x00020000 +#define CBF 0x00040000 /* nonstandard extension */ +#define CDF 0x00080000 /* nonstandard extension */ +#define CEF 0x00100000 /* nonstandard extension */ +#define CGF 0x00200000 /* nonstandard extension */ +#define CHF 0x00400000 /* nonstandard extension */ +#define CLF 0x00800000 /* nonstandard extension */ +#define CPF 0x01000000 /* nonstandard extension */ +#define CTF 0x02000000 /* nonstandard extension */ +#define CUF 0x04000000 /* nonstandard extension */ +#define CXF 0x08000000 +#define CYF 0x10000000 /* nonstandard extension */ +#define CZF 0x20000000 /* nonstandard extension */ + +/* + * ascii string indexed by bit position above (alter the above and you must + * alter this string) used to tell the user what flags caused us to complain + */ +#define FLGCH "abcdfiklnoprstuvwxBDEGHLPTUXYZ" + +/* + * legal pax operation bit patterns + */ + +#define ISLIST(x) (((x) & (RF|WF)) == 0) +#define ISEXTRACT(x) (((x) & (RF|WF)) == RF) +#define ISARCHIVE(x) (((x) & (AF|RF|WF)) == WF) +#define ISAPPND(x) (((x) & (AF|RF|WF)) == (AF|WF)) +#define ISCOPY(x) (((x) & (RF|WF)) == (RF|WF)) +#define ISWRITE(x) (((x) & (RF|WF)) == WF) + +/* + * Illegal option flag subsets based on pax operation + */ + +#define BDEXTR (AF|BF|LF|TF|WF|XF|CBF|CHF|CLF|CPF|CXF) +#define BDARCH (CF|KF|LF|NF|PF|RF|CDF|CEF|CYF|CZF) +#define BDCOPY (AF|BF|FF|OF|XF|CBF|CEF) +#define BDLIST (AF|BF|IF|KF|LF|OF|PF|RF|TF|UF|WF|XF|CBF|CDF|CHF|CLF|CPF|CXF|CYF|CZF) diff --git a/commands/pax/pat_rep.c b/commands/pax/pat_rep.c new file mode 100644 index 000000000..31ee7a44a --- /dev/null +++ b/commands/pax/pat_rep.c @@ -0,0 +1,1128 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef NET2_REGEX +#include +#else +#include +#endif +#include "pax.h" +#include "pat_rep.h" +#include "extern.h" + +/* + * routines to handle pattern matching, name modification (regular expression + * substitution and interactive renames), and destination name modification for + * copy (-rw). Both file name and link names are adjusted as required in these + * routines. + */ + +#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ +static PATTERN *pathead = NULL; /* file pattern match list head */ +static PATTERN *pattail = NULL; /* file pattern match list tail */ +static REPLACE *rephead = NULL; /* replacement string list head */ +static REPLACE *reptail = NULL; /* replacement string list tail */ + +static int rep_name(char *, int *, int); +static int tty_rename(ARCHD *); +static int fix_path(char *, int *, char *, int); +static int fn_match(char *, char *, char **); +static char * range_match(char *, int); +#ifdef NET2_REGEX +static int resub(regexp *, char *, char *, char *); +#else +static int resub(regex_t *, regmatch_t *, char *, char *, char *); +#endif + +/* + * rep_add() + * parses the -s replacement string; compiles the regular expression + * and stores the compiled value and it's replacement string together in + * replacement string list. Input to this function is of the form: + * /old/new/pg + * The first char in the string specifies the delimiter used by this + * replacement string. "Old" is a regular expression in "ed" format which + * is compiled by regcomp() and is applied to filenames. "new" is the + * substitution string; p and g are options flags for printing and global + * replacement (over the single filename) + * Return: + * 0 if a proper replacement string and regular expression was added to + * the list of replacement patterns; -1 otherwise. + */ + +int +rep_add(char *str) +{ + char *pt1; + char *pt2; + REPLACE *rep; +# ifndef NET2_REGEX + int res; + char rebuf[BUFSIZ]; +# endif + + /* + * throw out the bad parameters + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty replacement string"); + return(-1); + } + + /* + * first character in the string specifies what the delimiter is for + * this expression + */ + if ((pt1 = strchr(str+1, *str)) == NULL) { + paxwarn(1, "Invalid replacement string %s", str); + return(-1); + } + + /* + * allocate space for the node that handles this replacement pattern + * and split out the regular expression and try to compile it + */ + if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { + paxwarn(1, "Unable to allocate memory for replacement string"); + return(-1); + } + + *pt1 = '\0'; +# ifdef NET2_REGEX + if ((rep->rcmp = regcomp(str+1)) == NULL) { +# else + if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { + regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); + paxwarn(1, "%s while compiling regular expression %s", rebuf, str); +# endif + (void)free((char *)rep); + return(-1); + } + + /* + * put the delimiter back in case we need an error message and + * locate the delimiter at the end of the replacement string + * we then point the node at the new substitution string + */ + *pt1++ = *str; + if ((pt2 = strchr(pt1, *str)) == NULL) { +# ifdef NET2_REGEX + (void)free((char *)rep->rcmp); +# else + regfree(&(rep->rcmp)); +# endif + (void)free((char *)rep); + paxwarn(1, "Invalid replacement string %s", str); + return(-1); + } + + *pt2 = '\0'; + rep->nstr = pt1; + pt1 = pt2++; + rep->flgs = 0; + + /* + * set the options if any + */ + while (*pt2 != '\0') { + switch(*pt2) { + case 'g': + case 'G': + rep->flgs |= GLOB; + break; + case 'p': + case 'P': + rep->flgs |= PRNT; + break; + default: +# ifdef NET2_REGEX + (void)free((char *)rep->rcmp); +# else + regfree(&(rep->rcmp)); +# endif + (void)free((char *)rep); + *pt1 = *str; + paxwarn(1, "Invalid replacement string option %s", str); + return(-1); + } + ++pt2; + } + + /* + * all done, link it in at the end + */ + rep->fow = NULL; + if (rephead == NULL) { + reptail = rephead = rep; + return(0); + } + reptail->fow = rep; + reptail = rep; + return(0); +} + +/* + * pat_add() + * add a pattern match to the pattern match list. Pattern matches are used + * to select which archive members are extracted. (They appear as + * arguments to pax in the list and read modes). If no patterns are + * supplied to pax, all members in the archive will be selected (and the + * pattern match list is empty). + * Return: + * 0 if the pattern was added to the list, -1 otherwise + */ + +int +pat_add(char *str, char *chdnam) +{ + PATTERN *pt; + + /* + * throw out the junk + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty pattern string"); + return(-1); + } + + /* + * allocate space for the pattern and store the pattern. the pattern is + * part of argv so do not bother to copy it, just point at it. Add the + * node to the end of the pattern list + */ + if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { + paxwarn(1, "Unable to allocate memory for pattern string"); + return(-1); + } + + pt->pstr = str; + pt->pend = NULL; + pt->plen = strlen(str); + pt->fow = NULL; + pt->flgs = 0; + pt->chdname = chdnam; + + if (pathead == NULL) { + pattail = pathead = pt; + return(0); + } + pattail->fow = pt; + pattail = pt; + return(0); +} + +/* + * pat_chk() + * complain if any the user supplied pattern did not result in a match to + * a selected archive member. + */ + +void +pat_chk(void) +{ + PATTERN *pt; + int wban = 0; + + /* + * walk down the list checking the flags to make sure MTCH was set, + * if not complain + */ + for (pt = pathead; pt != NULL; pt = pt->fow) { + if (pt->flgs & MTCH) + continue; + if (!wban) { + paxwarn(1, "WARNING! These patterns were not matched:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", pt->pstr); + } +} + +/* + * pat_sel() + * the archive member which matches a pattern was selected. Mark the + * pattern as having selected an archive member. arcn->pat points at the + * pattern that was matched. arcn->pat is set in pat_match() + * + * NOTE: When the -c option is used, we are called when there was no match + * by pat_match() (that means we did match before the inverted sense of + * the logic). Now this seems really strange at first, but with -c we + * need to keep track of those patterns that cause an archive member to NOT + * be selected (it found an archive member with a specified pattern) + * Return: + * 0 if the pattern pointed at by arcn->pat was tagged as creating a + * match, -1 otherwise. + */ + +int +pat_sel(ARCHD *arcn) +{ + PATTERN *pt; + PATTERN **ppt; + int len; + + /* + * if no patterns just return + */ + if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) + return(0); + + /* + * when we are NOT limited to a single match per pattern mark the + * pattern and return + */ + if (!nflag) { + pt->flgs |= MTCH; + return(0); + } + + /* + * we reach this point only when we allow a single selected match per + * pattern, if the pattern matches a directory and we do not have -d + * (dflag) we are done with this pattern. We may also be handed a file + * in the subtree of a directory. in that case when we are operating + * with -d, this pattern was already selected and we are done + */ + if (pt->flgs & DIR_MTCH) + return(0); + + if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { + /* + * ok we matched a directory and we are allowing + * subtree matches but because of the -n only its children will + * match. This is tagged as a DIR_MTCH type. + * WATCH IT, the code assumes that pt->pend points + * into arcn->name and arcn->name has not been modified. + * If not we will have a big mess. Yup this is another kludge + */ + + /* + * if this was a prefix match, remove trailing part of path + * so we can copy it. Future matches will be exact prefix match + */ + if (pt->pend != NULL) + *pt->pend = '\0'; + + if ((pt->pstr = strdup(arcn->name)) == NULL) { + paxwarn(1, "Pattern select out of memory"); + if (pt->pend != NULL) + *pt->pend = '/'; + pt->pend = NULL; + return(-1); + } + + /* + * put the trailing / back in the source string + */ + if (pt->pend != NULL) { + *pt->pend = '/'; + pt->pend = NULL; + } + pt->plen = strlen(pt->pstr); + + /* + * strip off any trailing /, this should really never happen + */ + len = pt->plen - 1; + if (*(pt->pstr + len) == '/') { + *(pt->pstr + len) = '\0'; + pt->plen = len; + } + pt->flgs = DIR_MTCH | MTCH; + arcn->pat = pt; + return(0); + } + + /* + * we are then done with this pattern, so we delete it from the list + * because it can never be used for another match. + * Seems kind of strange to do for a -c, but the pax spec is really + * vague on the interaction of -c -n and -d. We assume that when -c + * and the pattern rejects a member (i.e. it matched it) it is done. + * In effect we place the order of the flags as having -c last. + */ + pt = pathead; + ppt = &pathead; + while ((pt != NULL) && (pt != arcn->pat)) { + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt == NULL) { + /* + * should never happen.... + */ + paxwarn(1, "Pattern list inconsistant"); + return(-1); + } + *ppt = pt->fow; + (void)free((char *)pt); + arcn->pat = NULL; + return(0); +} + +/* + * pat_match() + * see if this archive member matches any supplied pattern, if a match + * is found, arcn->pat is set to point at the potential pattern. Later if + * this archive member is "selected" we process and mark the pattern as + * one which matched a selected archive member (see pat_sel()) + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + */ + +int +pat_match(ARCHD *arcn) +{ + PATTERN *pt; + + arcn->pat = NULL; + + /* + * if there are no more patterns and we have -n (and not -c) we are + * done. otherwise with no patterns to match, matches all + */ + if (pathead == NULL) { + if (nflag && !cflag) + return(-1); + return(0); + } + + /* + * have to search down the list one at a time looking for a match. + */ + pt = pathead; + while (pt != NULL) { + /* + * check for a file name match unless we have DIR_MTCH set in + * this pattern then we want a prefix match + */ + if (pt->flgs & DIR_MTCH) { + /* + * this pattern was matched before to a directory + * as we must have -n set for this (but not -d). We can + * only match CHILDREN of that directory so we must use + * an exact prefix match (no wildcards). + */ + if ((arcn->name[pt->plen] == '/') && + (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) + break; + } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) + break; + pt = pt->fow; + } + + /* + * return the result, remember that cflag (-c) inverts the sense of a + * match + */ + if (pt == NULL) + return(cflag ? 0 : 1); + + /* + * We had a match, now when we invert the sense (-c) we reject this + * member. However we have to tag the pattern a being successful, (in a + * match, not in selecting an archive member) so we call pat_sel() here. + */ + arcn->pat = pt; + if (!cflag) + return(0); + + if (pat_sel(arcn) < 0) + return(-1); + arcn->pat = NULL; + return(1); +} + +/* + * fn_match() + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + * Note: *pend may be changed to show where the prefix ends. + */ + +static int +fn_match(char *pattern, char *string, char **pend) +{ + char c; + char test; + + *pend = NULL; + for (;;) { + switch (c = *pattern++) { + case '\0': + /* + * Ok we found an exact match + */ + if (*string == '\0') + return(0); + + /* + * Check if it is a prefix match + */ + if ((dflag == 1) || (*string != '/')) + return(-1); + + /* + * It is a prefix match, remember where the trailing + * / is located + */ + *pend = string; + return(0); + case '?': + if ((test = *string++) == '\0') + return (-1); + break; + case '*': + c = *pattern; + /* + * Collapse multiple *'s. + */ + while (c == '*') + c = *++pattern; + + /* + * Optimized hack for pattern with a * at the end + */ + if (c == '\0') + return (0); + + /* + * General case, use recursion. + */ + while ((test = *string) != '\0') { + if (!fn_match(pattern, string, pend)) + return (0); + ++string; + } + return (-1); + case '[': + /* + * range match + */ + if (((test = *string++) == '\0') || + ((pattern = range_match(pattern, test)) == NULL)) + return (-1); + break; + case '\\': + default: + if (c != *string++) + return (-1); + break; + } + } + /* NOTREACHED */ +} + +static char * +range_match(char *pattern, int test) +{ + char c; + char c2; + int negate; + int ok = 0; + + if ((negate = (*pattern == '!')) != 0) + ++pattern; + + while ((c = *pattern++) != ']') { + /* + * Illegal pattern + */ + if (c == '\0') + return (NULL); + + if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && + (c2 != ']')) { + if ((c <= test) && (test <= c2)) + ok = 1; + pattern += 2; + } else if (c == test) + ok = 1; + } + return (ok == negate ? NULL : pattern); +} + +/* + * mod_name() + * modify a selected file name. first attempt to apply replacement string + * expressions, then apply interactive file rename. We apply replacement + * string expressions to both filenames and file links (if we didn't the + * links would point to the wrong place, and we could never be able to + * move an archive that has a file link in it). When we rename files + * interactively, we store that mapping (old name to user input name) so + * if we spot any file links to the old file name in the future, we will + * know exactly how to fix the file link. + * Return: + * 0 continue to process file, 1 skip this file, -1 pax is finished + */ + +int +mod_name(ARCHD *arcn) +{ + int res = 0; + + /* + * Strip off leading '/' if appropriate. + * Currently, this option is only set for the tar format. + */ + if (rmleadslash && arcn->name[0] == '/') { + if (arcn->name[1] == '\0') { + arcn->name[0] = '.'; + } else { + (void)memmove(arcn->name, &arcn->name[1], + strlen(arcn->name)); + arcn->nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + paxwarn(0, "Removing leading / from absolute path names in the archive"); + } + } + if (rmleadslash && arcn->ln_name[0] == '/' && + (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { + if (arcn->ln_name[1] == '\0') { + arcn->ln_name[0] = '.'; + } else { + (void)memmove(arcn->ln_name, &arcn->ln_name[1], + strlen(arcn->ln_name)); + arcn->ln_nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + paxwarn(0, "Removing leading / from absolute path names in the archive"); + } + } + + /* + * IMPORTANT: We have a problem. what do we do with symlinks? + * Modifying a hard link name makes sense, as we know the file it + * points at should have been seen already in the archive (and if it + * wasn't seen because of a read error or a bad archive, we lose + * anyway). But there are no such requirements for symlinks. On one + * hand the symlink that refers to a file in the archive will have to + * be modified to so it will still work at its new location in the + * file system. On the other hand a symlink that points elsewhere (and + * should continue to do so) should not be modified. There is clearly + * no perfect solution here. So we handle them like hardlinks. Clearly + * a replacement made by the interactive rename mapping is very likely + * to be correct since it applies to a single file and is an exact + * match. The regular expression replacements are a little harder to + * justify though. We claim that the symlink name is only likely + * to be replaced when it points within the file tree being moved and + * in that case it should be modified. what we really need to do is to + * call an oracle here. :) + */ + if (rephead != NULL) { + /* + * we have replacement strings, modify the name and the link + * name if any. + */ + if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0) + return(res); + + if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) && + ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0)) + return(res); + } + + if (iflag) { + /* + * perform interactive file rename, then map the link if any + */ + if ((res = tty_rename(arcn)) != 0) + return(res); + if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) + sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); + } + return(res); +} + +/* + * tty_rename() + * Prompt the user for a replacement file name. A "." keeps the old name, + * a empty line skips the file, and an EOF on reading the tty, will cause + * pax to stop processing and exit. Otherwise the file name input, replaces + * the old one. + * Return: + * 0 process this file, 1 skip this file, -1 we need to exit pax + */ + +static int +tty_rename(ARCHD *arcn) +{ + char tmpname[PAXPATHLEN+2]; + int res; + + /* + * prompt user for the replacement name for a file, keep trying until + * we get some reasonable input. Archives may have more than one file + * on them with the same name (from updates etc). We print verbose info + * on the file so the user knows what is up. + */ + tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); + + for (;;) { + ls_tty(arcn); + tty_prnt("Input new name, or a \".\" to keep the old name, "); + tty_prnt("or a \"return\" to skip this file.\n"); + tty_prnt("Input > "); + if (tty_read(tmpname, sizeof(tmpname)) < 0) + return(-1); + if (strcmp(tmpname, "..") == 0) { + tty_prnt("Try again, illegal file name: ..\n"); + continue; + } + if (strlen(tmpname) > PAXPATHLEN) { + tty_prnt("Try again, file name too long\n"); + continue; + } + break; + } + + /* + * empty file name, skips this file. a "." leaves it alone + */ + if (tmpname[0] == '\0') { + tty_prnt("Skipping file.\n"); + return(1); + } + if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { + tty_prnt("Processing continues, name unchanged.\n"); + return(0); + } + + /* + * ok the name changed. We may run into links that point at this + * file later. we have to remember where the user sent the file + * in order to repair any links. + */ + tty_prnt("Processing continues, name changed to: %s\n", tmpname); + res = add_name(arcn->name, arcn->nlen, tmpname); + arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1); + arcn->name[arcn->nlen] = '\0'; + if (res < 0) + return(-1); + return(0); +} + +/* + * set_dest() + * fix up the file name and the link name (if any) so this file will land + * in the destination directory (used during copy() -rw). + * Return: + * 0 if ok, -1 if failure (name too long) + */ + +int +set_dest(ARCHD *arcn, char *dest_dir, int dir_len) +{ + if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) + return(-1); + + /* + * It is really hard to deal with symlinks here, we cannot be sure + * if the name they point was moved (or will be moved). It is best to + * leave them alone. + */ + if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) + return(0); + + if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) + return(-1); + return(0); +} + +/* + * fix_path + * concatenate dir_name and or_name and store the result in or_name (if + * it fits). This is one ugly function. + * Return: + * 0 if ok, -1 if the final name is too long + */ + +static int +fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) +{ + char *src; + char *dest; + char *start; + int len; + + /* + * we shift the or_name to the right enough to tack in the dir_name + * at the front. We make sure we have enough space for it all before + * we start. since dest always ends in a slash, we skip of or_name + * if it also starts with one. + */ + start = or_name; + src = start + *or_len; + dest = src + dir_len; + if (*start == '/') { + ++start; + --dest; + } + if ((len = dest - or_name) > PAXPATHLEN) { + paxwarn(1, "File name %s/%s, too long", dir_name, start); + return(-1); + } + *or_len = len; + + /* + * enough space, shift + */ + while (src >= start) + *dest-- = *src--; + src = dir_name + dir_len - 1; + + /* + * splice in the destination directory name + */ + while (src >= dir_name) + *dest-- = *src--; + + *(or_name + len) = '\0'; + return(0); +} + +/* + * rep_name() + * walk down the list of replacement strings applying each one in order. + * when we find one with a successful substitution, we modify the name + * as specified. if required, we print the results. if the resulting name + * is empty, we will skip this archive member. We use the regexp(3) + * routines (regexp() ought to win a prize as having the most cryptic + * library function manual page). + * --Parameters-- + * name is the file name we are going to apply the regular expressions to + * (and may be modified) + * nlen is the length of this name (and is modified to hold the length of + * the final string). + * prnt is a flag that says whether to print the final result. + * Return: + * 0 if substitution was successful, 1 if we are to skip the file (the name + * ended up empty) + */ + +static int +rep_name(char *name, int *nlen, int prnt) +{ + REPLACE *pt; + char *inpt; + char *outpt; + char *endpt; + char *rpt; + int found = 0; + int res; +# ifndef NET2_REGEX + regmatch_t pm[MAXSUBEXP]; +# endif + char nname[PAXPATHLEN+1]; /* final result of all replacements */ + char buf1[PAXPATHLEN+1]; /* where we work on the name */ + + /* + * copy the name into buf1, where we will work on it. We need to keep + * the orig string around so we can print out the result of the final + * replacement. We build up the final result in nname. inpt points at + * the string we apply the regular expression to. prnt is used to + * suppress printing when we handle replacements on the link field + * (the user already saw that substitution go by) + */ + pt = rephead; + (void)strcpy(buf1, name); + inpt = buf1; + outpt = nname; + endpt = outpt + PAXPATHLEN; + + /* + * try each replacement string in order + */ + while (pt != NULL) { + do { + /* + * check for a successful substitution, if not go to + * the next pattern, or cleanup if we were global + */ +# ifdef NET2_REGEX + if (regexec(pt->rcmp, inpt) == 0) +# else + if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) +# endif + break; + + /* + * ok we found one. We have three parts, the prefix + * which did not match, the section that did and the + * tail (that also did not match). Copy the prefix to + * the final output buffer (watching to make sure we + * do not create a string too long). + */ + found = 1; +# ifdef NET2_REGEX + rpt = pt->rcmp->startp[0]; +# else + rpt = inpt + pm[0].rm_so; +# endif + + while ((inpt < rpt) && (outpt < endpt)) + *outpt++ = *inpt++; + if (outpt == endpt) + break; + + /* + * for the second part (which matched the regular + * expression) apply the substitution using the + * replacement string and place it the prefix in the + * final output. If we have problems, skip it. + */ +# ifdef NET2_REGEX + if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) { +# else + if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt)) + < 0) { +# endif + if (prnt) + paxwarn(1, "Replacement name error %s", + name); + return(1); + } + outpt += res; + + /* + * we set up to look again starting at the first + * character in the tail (of the input string right + * after the last character matched by the regular + * expression (inpt always points at the first char in + * the string to process). If we are not doing a global + * substitution, we will use inpt to copy the tail to + * the final result. Make sure we do not overrun the + * output buffer + */ +# ifdef NET2_REGEX + inpt = pt->rcmp->endp[0]; +# else + inpt += pm[0].rm_eo - pm[0].rm_so; +# endif + + if ((outpt == endpt) || (*inpt == '\0')) + break; + + /* + * if the user wants global we keep trying to + * substitute until it fails, then we are done. + */ + } while (pt->flgs & GLOB); + + if (found) + break; + + /* + * a successful substitution did NOT occur, try the next one + */ + pt = pt->fow; + } + + if (found) { + /* + * we had a substitution, copy the last tail piece (if there is + * room) to the final result + */ + while ((outpt < endpt) && (*inpt != '\0')) + *outpt++ = *inpt++; + + *outpt = '\0'; + if ((outpt == endpt) && (*inpt != '\0')) { + if (prnt) + paxwarn(1,"Replacement name too long %s >> %s", + name, nname); + return(1); + } + + /* + * inform the user of the result if wanted + */ + if (prnt && (pt->flgs & PRNT)) { + if (*nname == '\0') + (void)fprintf(stderr,"%s >> \n", + name); + else + (void)fprintf(stderr,"%s >> %s\n", name, nname); + } + + /* + * if empty inform the caller this file is to be skipped + * otherwise copy the new name over the orig name and return + */ + if (*nname == '\0') + return(1); + *nlen = l_strncpy(name, nname, PAXPATHLEN + 1); + name[PAXPATHLEN] = '\0'; + } + return(0); +} + +#ifdef NET2_REGEX +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +static int +resub(regexp *prog, char *src, char *dest, char *destend) +{ + char *spt; + char *dpt; + char c; + int no; + int len; + + spt = src; + dpt = dest; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + if (c == '&') + no = 0; + else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) + no = *spt++ - '0'; + else { + if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) + c = *spt++; + *dpt++ = c; + continue; + } + if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) || + ((len = prog->endp[no] - prog->startp[no]) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + len = destend - dpt; + if (l_strncpy(dpt, prog->startp[no], len) != len) + return(-1); + dpt += len; + } + return(dpt - dest); +} + +#else + +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +static int +resub(regex_t *rp, regmatch_t *pm, char *src, char *dest, + char *destend) +{ + char *spt; + char *dpt; + char c; + regmatch_t *pmpt; + int len; + int subexcnt; + + spt = src; + dpt = dest; + subexcnt = rp->re_nsub; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + /* + * see if we just have an ordinary replacement character + * or we refer to a subexpression. + */ + if (c == '&') { + pmpt = pm; + } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { + /* + * make sure there is a subexpression as specified + */ + if ((len = *spt++ - '0') > subexcnt) + return(-1); + pmpt = pm + len; + } else { + /* + * Ordinary character, just copy it + */ + if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) + c = *spt++; + *dpt++ = c; + continue; + } + + /* + * continue if the subexpression is bogus + */ + if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || + ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + len = destend - dpt; + if (l_strncpy(dpt, src + pmpt->rm_so, len) != len) + return(-1); + dpt += len; + } + return(dpt - dest); +} +#endif diff --git a/commands/pax/pat_rep.h b/commands/pax/pat_rep.h new file mode 100644 index 000000000..325458315 --- /dev/null +++ b/commands/pax/pat_rep.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pat_rep.h 8.1 (Berkeley) 5/31/93 + * $FreeBSD: src/bin/pax/pat_rep.h,v 1.6 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * data structure for storing user supplied replacement strings (-s) + */ +typedef struct replace { + char *nstr; /* the new string we will substitute with */ +# ifdef NET2_REGEX + regexp *rcmp; /* compiled regular expression used to match */ +# else + regex_t rcmp; /* compiled regular expression used to match */ +# endif + int flgs; /* print conversions? global in operation? */ +#define PRNT 0x1 +#define GLOB 0x2 + struct replace *fow; /* pointer to next pattern */ +} REPLACE; diff --git a/commands/pax/pax.c b/commands/pax/pax.c new file mode 100644 index 000000000..a687d715f --- /dev/null +++ b/commands/pax/pax.c @@ -0,0 +1,439 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if 0 +#ifndef lint +static char const copyright[] = +"@(#) Copyright (c) 1992, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)pax.c 8.2 (Berkeley) 4/18/94"; +#endif /* not lint */ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" +static int gen_init(void); + +/* + * PAX main routines, general globals and some simple start up routines + */ + +/* + * Variables that can be accessed by any routine within pax + */ +int act = DEFOP; /* read/write/append/copy */ +FSUB *frmt = NULL; /* archive format type */ +int cflag; /* match all EXCEPT pattern/file */ +int cwdfd; /* starting cwd */ +int dflag; /* directory member match only */ +int iflag; /* interactive file/archive rename */ +int kflag; /* do not overwrite existing files */ +int lflag; /* use hard links when possible */ +int nflag; /* select first archive member match */ +int tflag; /* restore access time after read */ +int uflag; /* ignore older modification time files */ +int vflag; /* produce verbose output */ +int Dflag; /* same as uflag except inode change time */ +int Hflag; /* follow command line symlinks (write only) */ +int Lflag; /* follow symlinks when writing */ +int Xflag; /* archive files with same device id only */ +int Yflag; /* same as Dflg except after name mode */ +int Zflag; /* same as uflg except after name mode */ +int vfpart; /* is partial verbose output in progress */ +int patime = 1; /* preserve file access time */ +int pmtime = 1; /* preserve file modification times */ +int nodirs; /* do not create directories as needed */ +int pmode; /* preserve file mode bits */ +int pids; /* preserve file uid/gid */ +int rmleadslash = 0; /* remove leading '/' from pathnames */ +int exit_val; /* exit value */ +int docrc; /* check/create file crc */ +char *dirptr; /* destination dir in a copy */ +const char *argv0; /* root of argv[0] */ +sigset_t s_mask; /* signal mask for cleanup critical sect */ +FILE *listf; /* file pointer to print file list to */ +char *tempfile; /* tempfile to use for mkstemp(3) */ +char *tempbase; /* basename of tempfile to use for mkstemp(3) */ + +/* + * PAX - Portable Archive Interchange + * + * A utility to read, write, and write lists of the members of archive + * files and copy directory hierarchies. A variety of archive formats + * are supported (some are described in POSIX 1003.1 10.1): + * + * ustar - 10.1.1 extended tar interchange format + * cpio - 10.1.2 extended cpio interchange format + * tar - old BSD 4.3 tar format + * binary cpio - old cpio with binary header format + * sysVR4 cpio - with and without CRC + * + * This version is a superset of IEEE Std 1003.2b-d3 + * + * Summary of Extensions to the IEEE Standard: + * + * 1 READ ENHANCEMENTS + * 1.1 Operations which read archives will continue to operate even when + * processing archives which may be damaged, truncated, or fail to meet + * format specs in several different ways. Damaged sections of archives + * are detected and avoided if possible. Attempts will be made to resync + * archive read operations even with badly damaged media. + * 1.2 Blocksize requirements are not strictly enforced on archive read. + * Tapes which have variable sized records can be read without errors. + * 1.3 The user can specify via the non-standard option flag -E if error + * resync operation should stop on a media error, try a specified number + * of times to correct, or try to correct forever. + * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks + * of all zeros will be restored with holes appropriate for the target + * file system + * 1.5 The user is notified whenever something is found during archive + * read operations which violates spec (but the read will continue). + * 1.6 Multiple archive volumes can be read and may span over different + * archive devices + * 1.7 Rigidly restores all file attributes exactly as they are stored on the + * archive. + * 1.8 Modification change time ranges can be specified via multiple -T + * options. These allow a user to select files whose modification time + * lies within a specific time range. + * 1.9 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 1.10 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 1.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 2 WRITE ENHANCEMENTS + * 2.1 Write operation will stop instead of allowing a user to create a flawed + * flawed archive (due to any problem). + * 2.2 Archives written by pax are forced to strictly conform to both the + * archive and pax the specific format specifications. + * 2.3 Blocking size and format is rigidly enforced on writes. + * 2.4 Formats which may exhibit header overflow problems (they have fields + * too small for large file systems, such as inode number storage), use + * routines designed to repair this problem. These techniques still + * conform to both pax and format specifications, but no longer truncate + * these fields. This removes any restrictions on using these archive + * formats on large file systems. + * 2.5 Multiple archive volumes can be written and may span over different + * archive devices + * 2.6 A archive volume record limit allows the user to specify the number + * of bytes stored on an archive volume. When reached the user is + * prompted for the next archive volume. This is specified with the + * non-standard -B flag. The limit is rounded up to the next blocksize. + * 2.7 All archive padding during write use zero filled sections. This makes + * it much easier to pull data out of flawed archive during read + * operations. + * 2.8 Access time reset with the -t applies to all file nodes (including + * directories). + * 2.9 Symbolic links can be followed with -L (optional in the spec). + * 2.10 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 2.11 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 2.12 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 2.13 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * + * 3 COPY ENHANCEMENTS + * 3.1 Sparse files (lseek holes) can be copied without expanding the holes + * into zero filled blocks. The file copy is created with holes which are + * appropriate for the target file system + * 3.2 Access time as well as modification time on copied file trees can be + * preserved with the appropriate -p options. + * 3.3 Access time reset with the -t applies to all file nodes (including + * directories). + * 3.4 Symbolic links can be followed with -L (optional in the spec). + * 3.5 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 3.6 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 3.7 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 3.8 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * 3.9 File inode change time can be checked against existing file before + * name modification (-D) + * 3.10 File inode change time can be checked against existing file after + * name modification (-Y) + * 3.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 4 GENERAL ENHANCEMENTS + * 4.1 Internal structure is designed to isolate format dependent and + * independent functions. Formats are selected via a format driver table. + * This encourages the addition of new archive formats by only having to + * write those routines which id, read and write the archive header. + */ + +/* + * main() + * parse options, set up and operate as specified by the user. + * any operational flaw will set exit_val to non-zero + * Return: 0 if ok, 1 otherwise + */ + +int +main(int argc, char *argv[]) +{ + const char *tmpdir; + size_t tdlen; + + (void) setlocale(LC_ALL, ""); + listf = stderr; + /* + * Keep a reference to cwd, so we can always come back home. + */ + cwdfd = open(".", O_RDONLY); + if (cwdfd < 0) { + syswarn(0, errno, "Can't open current working directory."); + return(exit_val); + } + + /* + * Where should we put temporary files? + */ + if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') + tmpdir = _PATH_TMP; + tdlen = strlen(tmpdir); + while(tdlen > 0 && tmpdir[tdlen - 1] == '/') + tdlen--; + tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE)); + if (tempfile == NULL) { + paxwarn(1, "Cannot allocate memory for temp file name."); + return(exit_val); + } + if (tdlen) + memcpy(tempfile, tmpdir, tdlen); + tempbase = tempfile + tdlen; + *tempbase++ = '/'; + + /* + * parse options, determine operational mode, general init + */ + options(argc, argv); + if ((gen_init() < 0) || (tty_init() < 0)) + return(exit_val); + + /* + * select a primary operation mode + */ + switch(act) { + case EXTRACT: + extract(); + break; + case ARCHIVE: + archive(); + break; + case APPND: + if (gzip_program != NULL) + err(1, "can not gzip while appending"); + append(); + break; + case COPY: + copy(); + break; + default: + case LIST: + list(); + break; + } + return(exit_val); +} + +/* + * sig_cleanup() + * when interrupted we try to do whatever delayed processing we can. + * This is not critical, but we really ought to limit our damage when we + * are aborted by the user. + * Return: + * never.... + */ + +void +sig_cleanup(int which_sig) +{ + /* + * restore modes and times for any dirs we may have created + * or any dirs we may have read. Set vflag and vfpart so the user + * will clearly see the message on a line by itself. + */ + vflag = vfpart = 1; +#if 0 + /* ignore this under minix */ + if (which_sig == SIGXCPU) + paxwarn(0, "Cpu time limit reached, cleaning up."); + else +#endif + paxwarn(0, "Signal caught, cleaning up."); + + ar_close(); + proc_dir(); + if (tflag) + atdir_end(); + exit(1); +} + +/* + * gen_init() + * general setup routines. Not all are required, but they really help + * when dealing with a medium to large sized archives. + */ + +static int +gen_init(void) +{ +#if 0 + struct rlimit reslimit; +#endif + struct sigaction n_hand; + struct sigaction o_hand; + +#if 0 + /* + * Really needed to handle large archives. We can run out of memory for + * internal tables really fast when we have a whole lot of files... + */ + if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_DATA , &reslimit); + } + + /* + * should file size limits be waived? if the os limits us, this is + * needed if we want to write a large archive + */ + if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_FSIZE , &reslimit); + } + + /* + * increase the size the stack can grow to + */ + if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_STACK , &reslimit); + } + + /* + * not really needed, but doesn't hurt + */ + if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_RSS , &reslimit); + } +#endif + + /* + * signal handling to reset stored directory times and modes. Since + * we deal with broken pipes via failed writes we ignore it. We also + * deal with any file size limit thorugh failed writes. Cpu time + * limits are caught and a cleanup is forced. + */ + + if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || + (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || + (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) +#if 0 + || (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0) +#endif + ) { + paxwarn(1, "Unable to set up signal mask"); + return(-1); + } + memset(&n_hand, 0, sizeof n_hand); + n_hand.sa_mask = s_mask; + n_hand.sa_flags = 0; + n_hand.sa_handler = sig_cleanup; + + if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGHUP, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGTERM, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGINT, &o_hand, &o_hand) < 0)) + goto out; + + if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGQUIT, &o_hand, &o_hand) < 0)) + goto out; + +#if 0 + if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) && + (o_hand.sa_handler == SIG_IGN) && + (sigaction(SIGXCPU, &o_hand, &o_hand) < 0)) + goto out; +#endif + + n_hand.sa_handler = SIG_IGN; + if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) +#if 0 + || (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0) +#endif + ) + + + goto out; + return(0); + + out: + syswarn(1, errno, "Unable to set up signal handler"); + return(-1); +} diff --git a/commands/pax/pax.h b/commands/pax/pax.h new file mode 100644 index 000000000..f0b64ac1c --- /dev/null +++ b/commands/pax/pax.h @@ -0,0 +1,251 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pax.h 8.2 (Berkeley) 4/18/94 + * $FreeBSD: src/bin/pax/pax.h,v 1.18 2004/04/06 20:06:48 markm Exp $ + */ + + +#include +#include + +/* + * BSD PAX global data structures and constants. + */ + +#define MAXBLK 64512 /* MAX blocksize supported (posix SPEC) */ + /* WARNING: increasing MAXBLK past 32256 */ + /* will violate posix spec. */ +#define MAXBLK_POSIX 32256 /* MAX blocksize supported as per POSIX */ +#define BLKMULT 512 /* blocksize must be even mult of 512 bytes */ + /* Don't even think of changing this */ +#define DEVBLK 8192 /* default read blksize for devices */ +#define FILEBLK 10240 /* default read blksize for files */ +#define PAXPATHLEN 3072 /* maximum path length for pax. MUST be */ + /* longer than the system PATH_MAX */ + +/* + * Pax modes of operation + */ +#define LIST 0 /* List the file in an archive */ +#define EXTRACT 1 /* extract the files in an archive */ +#define ARCHIVE 2 /* write a new archive */ +#define APPND 3 /* append to the end of an archive */ +#define COPY 4 /* copy files to destination dir */ +#define DEFOP LIST /* if no flags default is to LIST */ + +/* + * Device type of the current archive volume + */ +#define ISREG 0 /* regular file */ +#define ISCHR 1 /* character device */ +#define ISBLK 2 /* block device */ +#define ISTAPE 3 /* tape drive */ +#define ISPIPE 4 /* pipe/socket */ + +typedef struct archd ARCHD; +typedef struct fsub FSUB; +typedef struct oplist OPLIST; +typedef struct pattern PATTERN; + +/* + * Format Specific Routine Table + * + * The format specific routine table allows new archive formats to be quickly + * added. Overall pax operation is independent of the actual format used to + * form the archive. Only those routines which deal directly with the archive + * are tailored to the oddities of the specific format. All other routines are + * independent of the archive format. Data flow in and out of the format + * dependent routines pass pointers to ARCHD structure (described below). + */ +struct fsub { + const char *name; /* name of format, this is the name the user */ + /* gives to -x option to select it. */ + int bsz; /* default block size. used when the user */ + /* does not specify a blocksize for writing */ + /* Appends continue to with the blocksize */ + /* the archive is currently using. */ + int hsz; /* Header size in bytes. this is the size of */ + /* the smallest header this format supports. */ + /* Headers are assumed to fit in a BLKMULT. */ + /* If they are bigger, get_head() and */ + /* get_arc() must be adjusted */ + int udev; /* does append require unique dev/ino? some */ + /* formats use the device and inode fields */ + /* to specify hard links. when members in */ + /* the archive have the same inode/dev they */ + /* are assumed to be hard links. During */ + /* append we may have to generate unique ids */ + /* to avoid creating incorrect hard links */ + int hlk; /* does archive store hard links info? if */ + /* not, we do not bother to look for them */ + /* during archive write operations */ + int blkalgn; /* writes must be aligned to blkalgn boundary */ + int inhead; /* is the trailer encoded in a valid header? */ + /* if not, trailers are assumed to be found */ + /* in invalid headers (i.e like tar) */ + int (*id)(char *, int); /* checks if a buffer is a valid header */ + /* returns 1 if it is, o.w. returns a 0 */ + int (*st_rd)(void); /* initialize routine for read. so format */ + /* can set up tables etc before it starts */ + /* reading an archive */ + int (*rd)(ARCHD *, char *); + /* read header routine. passed a pointer to */ + /* ARCHD. It must extract the info from the */ + /* format and store it in the ARCHD struct. */ + /* This routine is expected to fill all the */ + /* fields in the ARCHD (including stat buf) */ + /* 0 is returned when a valid header is */ + /* found. -1 when not valid. This routine */ + /* set the skip and pad fields so the format */ + /* independent routines know the amount of */ + /* padding and the number of bytes of data */ + /* which follow the header. This info is */ + /* used skip to the next file header */ + off_t (*end_rd)(void); /* read cleanup. Allows format to clean up */ + /* and MUST RETURN THE LENGTH OF THE TRAILER */ + /* RECORD (so append knows how many bytes */ + /* to move back to rewrite the trailer) */ + int (*st_wr)(void); /* initialize routine for write operations */ + int (*wr)(ARCHD *); /* write archive header. Passed an ARCHD */ + /* filled with the specs on the next file to */ + /* archived. Returns a 1 if no file data is */ + /* is to be stored; 0 if file data is to be */ + /* added. A -1 is returned if a write */ + /* operation to the archive failed. this */ + /* function sets the skip and pad fields so */ + /* the proper padding can be added after */ + /* file data. This routine must NEVER write */ + /* a flawed archive header. */ + int (*end_wr)(void); /* end write. write the trailer and do any */ + /* other format specific functions needed */ + /* at the end of an archive write */ + int (*trail_cpio)(ARCHD *); + int (*trail_tar)(char *, int, int *); + /* returns 0 if a valid trailer, -1 if not */ + /* For formats which encode the trailer */ + /* outside of a valid header, a return value */ + /* of 1 indicates that the block passed to */ + /* it can never contain a valid header (skip */ + /* this block, no point in looking at it) */ + int (*rd_data)(ARCHD *, int, off_t *); + /* read/process file data from the archive */ + int (*wr_data)(ARCHD *, int, off_t *); + /* write/process file data to the archive */ + int (*options)(void); /* process format specific options (-o) */ +}; + +/* + * Pattern matching structure + * + * Used to store command line patterns + */ +struct pattern { + char *pstr; /* pattern to match, user supplied */ + char *pend; /* end of a prefix match */ + char *chdname; /* the dir to change to if not NULL. */ + int plen; /* length of pstr */ + int flgs; /* processing/state flags */ +#define MTCH 0x1 /* pattern has been matched */ +#define DIR_MTCH 0x2 /* pattern matched a directory */ + struct pattern *fow; /* next pattern */ +}; + +/* + * General Archive Structure (used internal to pax) + * + * This structure is used to pass information about archive members between + * the format independent routines and the format specific routines. When + * new archive formats are added, they must accept requests and supply info + * encoded in a structure of this type. The name fields are declared statically + * here, as there is only ONE of these floating around, size is not a major + * consideration. Eventually converting the name fields to a dynamic length + * may be required if and when the supporting operating system removes all + * restrictions on the length of pathnames it will resolve. + */ +struct archd { + int nlen; /* file name length */ + char name[PAXPATHLEN+1]; /* file name */ + int ln_nlen; /* link name length */ + char ln_name[PAXPATHLEN+1]; /* name to link to (if any) */ + char *org_name; /* orig name in file system */ + PATTERN *pat; /* ptr to pattern match (if any) */ + struct stat sb; /* stat buffer see stat(2) */ + off_t pad; /* bytes of padding after file xfer */ + off_t skip; /* bytes of real data after header */ + /* IMPORTANT. The st_size field does */ + /* not always indicate the amount of */ + /* data following the header. */ + u_long crc; /* file crc */ + int type; /* type of file node */ +#define PAX_DIR 1 /* directory */ +#define PAX_CHR 2 /* character device */ +#define PAX_BLK 3 /* block device */ +#define PAX_REG 4 /* regular file */ +#define PAX_SLK 5 /* symbolic link */ +#define PAX_SCK 6 /* socket */ +#define PAX_FIF 7 /* fifo */ +#define PAX_HLK 8 /* hard link */ +#define PAX_HRG 9 /* hard link to a regular file */ +#define PAX_CTG 10 /* high performance file */ +}; + +/* + * Format Specific Options List + * + * Used to pass format options to the format options handler + */ +struct oplist { + char *name; /* option variable name e.g. name= */ + char *value; /* value for option variable */ + struct oplist *fow; /* next option */ +}; + +/* + * General Macros + */ +#ifndef MIN +#define MIN(a,b) (((a)<(b))?(a):(b)) +#endif +#define TODEV(x, y) makedev((x), (y)) + +/* + * General Defines + */ +#define HEX 16 +#define OCT 8 +#define _PAX_ 1 +#define _TFILE_BASE "paxXXXXXXXXXX" + +#define err(c, str) { perror(str); exit(c); } +#define setpassent(a) setpwent() +#define setgroupent(a) setgrent() diff --git a/commands/pax/sel_subs.c b/commands/pax/sel_subs.c new file mode 100644 index 000000000..3746a00b5 --- /dev/null +++ b/commands/pax/sel_subs.c @@ -0,0 +1,605 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)sel_subs.c 8.1 (Berkeley) 5/31/93"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "sel_subs.h" +#include "extern.h" + +static int str_sec(char *, time_t *); +static int usr_match(ARCHD *); +static int grp_match(ARCHD *); +static int trng_match(ARCHD *); + +static TIME_RNG *trhead = NULL; /* time range list head */ +static TIME_RNG *trtail = NULL; /* time range list tail */ +static USRT **usrtb = NULL; /* user selection table */ +static GRPT **grptb = NULL; /* group selection table */ + +/* + * Routines for selection of archive members + */ + +/* + * sel_chk() + * check if this file matches a specified uid, gid or time range + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +int +sel_chk(ARCHD *arcn) +{ + if (((usrtb != NULL) && usr_match(arcn)) || + ((grptb != NULL) && grp_match(arcn)) || + ((trhead != NULL) && trng_match(arcn))) + return(1); + return(0); +} + +/* + * User/group selection routines + * + * Routines to handle user selection of files based on the file uid/gid. To + * add an entry, the user supplies either then name or the uid/gid starting with + * a # on the command line. A \# will escape the #. + */ + +/* + * usr_add() + * add a user match to the user match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +usr_add(char *str) +{ + u_int indx; + USRT *pt; + struct passwd *pw; + uid_t uid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((usrtb == NULL) && + ((usrtb = (USRT **)calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) { + paxwarn(1, "Unable to allocate memory for user selection table"); + return(-1); + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a user name, \# escapes # as first char in user name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if ((pw = getpwnam(str)) == NULL) { + paxwarn(1, "Unable to find uid for user: %s", str); + return(-1); + } + uid = (uid_t)pw->pw_uid; + } else +# ifdef NET2_STAT + uid = (uid_t)atoi(str+1); +# else + uid = (uid_t)strtoul(str+1, NULL, 10); +# endif + endpwent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)uid) % USR_TB_SZ; + if ((pt = usrtb[indx]) != NULL) { + while (pt != NULL) { + if (pt->uid == uid) + return(0); + pt = pt->fow; + } + } + + /* + * uid is not yet in the table, add it to the front of the chain + */ + if ((pt = (USRT *)malloc(sizeof(USRT))) != NULL) { + pt->uid = uid; + pt->fow = usrtb[indx]; + usrtb[indx] = pt; + return(0); + } + paxwarn(1, "User selection table out of memory"); + return(-1); +} + +/* + * usr_match() + * check if this files uid matches a selected uid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +usr_match(ARCHD *arcn) +{ + USRT *pt; + + /* + * hash and look for it in the table + */ + pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ]; + while (pt != NULL) { + if (pt->uid == arcn->sb.st_uid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * grp_add() + * add a group match to the group match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +grp_add(char *str) +{ + u_int indx; + GRPT *pt; + struct group *gr; + gid_t gid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((grptb == NULL) && + ((grptb = (GRPT **)calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) { + paxwarn(1, "Unable to allocate memory fo group selection table"); + return(-1); + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a group name, \# escapes # as first char in group name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if ((gr = getgrnam(str)) == NULL) { + paxwarn(1,"Cannot determine gid for group name: %s", str); + return(-1); + } + gid = gr->gr_gid; + } else +# ifdef NET2_STAT + gid = (gid_t)atoi(str+1); +# else + gid = (gid_t)strtoul(str+1, NULL, 10); +# endif + endgrent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)gid) % GRP_TB_SZ; + if ((pt = grptb[indx]) != NULL) { + while (pt != NULL) { + if (pt->gid == gid) + return(0); + pt = pt->fow; + } + } + + /* + * gid not in the table, add it to the front of the chain + */ + if ((pt = (GRPT *)malloc(sizeof(GRPT))) != NULL) { + pt->gid = gid; + pt->fow = grptb[indx]; + grptb[indx] = pt; + return(0); + } + paxwarn(1, "Group selection table out of memory"); + return(-1); +} + +/* + * grp_match() + * check if this files gid matches a selected gid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +grp_match(ARCHD *arcn) +{ + GRPT *pt; + + /* + * hash and look for it in the table + */ + pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ]; + while (pt != NULL) { + if (pt->gid == arcn->sb.st_gid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * Time range selection routines + * + * Routines to handle user selection of files based on the modification and/or + * inode change time falling within a specified time range (the non-standard + * -T flag). The user may specify any number of different file time ranges. + * Time ranges are checked one at a time until a match is found (if at all). + * If the file has a mtime (and/or ctime) which lies within one of the time + * ranges, the file is selected. Time ranges may have a lower and/or an upper + * value. These ranges are inclusive. When no time ranges are supplied to pax + * with the -T option, all members in the archive will be selected by the time + * range routines. When only a lower range is supplied, only files with a + * mtime (and/or ctime) equal to or younger are selected. When only an upper + * range is supplied, only files with a mtime (and/or ctime) equal to or older + * are selected. When the lower time range is equal to the upper time range, + * only files with a mtime (or ctime) of exactly that time are selected. + */ + +/* + * trng_add() + * add a time range match to the time range list. + * This is a non-standard pax option. Lower and upper ranges are in the + * format: [yy[mm[dd[hh]]]]mm[.ss] and are comma separated. + * Time ranges are based on current time, so 1234 would specify a time of + * 12:34 today. + * Return: + * 0 if the time range was added to the list, -1 otherwise + */ + +int +trng_add(char *str) +{ + TIME_RNG *pt; + char *up_pt = NULL; + char *stpt; + char *flgpt; + int dot = 0; + + /* + * throw out the badly formed time ranges + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty time range string"); + return(-1); + } + + /* + * locate optional flags suffix /{cm}. + */ + if ((flgpt = strrchr(str, '/')) != NULL) + *flgpt++ = '\0'; + + for (stpt = str; *stpt != '\0'; ++stpt) { + if ((*stpt >= '0') && (*stpt <= '9')) + continue; + if ((*stpt == ',') && (up_pt == NULL)) { + *stpt = '\0'; + up_pt = stpt + 1; + dot = 0; + continue; + } + + /* + * allow only one dot per range (secs) + */ + if ((*stpt == '.') && (!dot)) { + ++dot; + continue; + } + paxwarn(1, "Improperly specified time range: %s", str); + goto out; + } + + /* + * allocate space for the time range and store the limits + */ + if ((pt = (TIME_RNG *)malloc(sizeof(TIME_RNG))) == NULL) { + paxwarn(1, "Unable to allocate memory for time range"); + return(-1); + } + + /* + * by default we only will check file mtime, but usee can specify + * mtime, ctime (inode change time) or both. + */ + if ((flgpt == NULL) || (*flgpt == '\0')) + pt->flgs = CMPMTME; + else { + pt->flgs = 0; + while (*flgpt != '\0') { + switch(*flgpt) { + case 'M': + case 'm': + pt->flgs |= CMPMTME; + break; + case 'C': + case 'c': + pt->flgs |= CMPCTME; + break; + default: + paxwarn(1, "Bad option %c with time range %s", + *flgpt, str); + goto out; + } + ++flgpt; + } + } + + /* + * start off with the current time + */ + pt->low_time = pt->high_time = time(NULL); + if (*str != '\0') { + /* + * add lower limit + */ + if (str_sec(str, &(pt->low_time)) < 0) { + paxwarn(1, "Illegal lower time range %s", str); + (void)free((char *)pt); + goto out; + } + pt->flgs |= HASLOW; + } + + if ((up_pt != NULL) && (*up_pt != '\0')) { + /* + * add upper limit + */ + if (str_sec(up_pt, &(pt->high_time)) < 0) { + paxwarn(1, "Illegal upper time range %s", up_pt); + (void)free((char *)pt); + goto out; + } + pt->flgs |= HASHIGH; + + /* + * check that the upper and lower do not overlap + */ + if (pt->flgs & HASLOW) { + if (pt->low_time > pt->high_time) { + paxwarn(1, "Upper %s and lower %s time overlap", + up_pt, str); + (void)free((char *)pt); + return(-1); + } + } + } + + pt->fow = NULL; + if (trhead == NULL) { + trtail = trhead = pt; + return(0); + } + trtail->fow = pt; + trtail = pt; + return(0); + + out: + paxwarn(1, "Time range format is: [yy[mm[dd[hh]]]]mm[.ss][/[c][m]]"); + return(-1); +} + +/* + * trng_match() + * check if this files mtime/ctime falls within any supplied time range. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +trng_match(ARCHD *arcn) +{ + TIME_RNG *pt; + + /* + * have to search down the list one at a time looking for a match. + * remember time range limits are inclusive. + */ + pt = trhead; + while (pt != NULL) { + switch(pt->flgs & CMPBOTH) { + case CMPBOTH: + /* + * user wants both mtime and ctime checked for this + * time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPCTME: + /* + * user wants only ctime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPMTME: + default: + /* + * user wants only mtime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + } + break; + } + + if (pt == NULL) + return(1); + return(0); +} + +/* + * str_sec() + * Convert a time string in the format of [yy[mm[dd[hh]]]]mm[.ss] to gmt + * seconds. Tval already has current time loaded into it at entry. + * Return: + * 0 if converted ok, -1 otherwise + */ + +static int +str_sec(char *str, time_t *tval) +{ + struct tm *lt; + char *dot = NULL; + + lt = localtime(tval); + if ((dot = strchr(str, '.')) != NULL) { + /* + * seconds (.ss) + */ + *dot++ = '\0'; + if (strlen(dot) != 2) + return(-1); + if ((lt->tm_sec = ATOI2(dot)) > 61) + return(-1); + } else + lt->tm_sec = 0; + + switch (strlen(str)) { + case 10: + /* + * year (yy) + * watch out for year 2000 + */ + if ((lt->tm_year = ATOI2(str)) < 69) + lt->tm_year += 100; + str += 2; + /* FALLTHROUGH */ + case 8: + /* + * month (mm) + * watch out months are from 0 - 11 internally + */ + if ((lt->tm_mon = ATOI2(str)) > 12) + return(-1); + --lt->tm_mon; + str += 2; + /* FALLTHROUGH */ + case 6: + /* + * day (dd) + */ + if ((lt->tm_mday = ATOI2(str)) > 31) + return(-1); + str += 2; + /* FALLTHROUGH */ + case 4: + /* + * hour (hh) + */ + if ((lt->tm_hour = ATOI2(str)) > 23) + return(-1); + str += 2; + /* FALLTHROUGH */ + case 2: + /* + * minute (mm) + */ + if ((lt->tm_min = ATOI2(str)) > 59) + return(-1); + break; + default: + return(-1); + } + /* + * convert broken-down time to GMT clock time seconds + */ + if ((*tval = mktime(lt)) == -1) + return(-1); + return(0); +} diff --git a/commands/pax/sel_subs.h b/commands/pax/sel_subs.h new file mode 100644 index 000000000..32c07b7df --- /dev/null +++ b/commands/pax/sel_subs.h @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sel_subs.h 8.1 (Berkeley) 5/31/93 + * $FreeBSD: src/bin/pax/sel_subs.h,v 1.6 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * data structure for storing uid/grp selects (-U, -G non standard options) + */ + +#define USR_TB_SZ 317 /* user selection table size */ +#define GRP_TB_SZ 317 /* user selection table size */ + +typedef struct usrt { + uid_t uid; + struct usrt *fow; /* next uid */ +} USRT; + +typedef struct grpt { + gid_t gid; + struct grpt *fow; /* next gid */ +} GRPT; + +/* + * data structure for storing user supplied time ranges (-T option) + */ + +#define ATOI2(s) ((((s)[0] - '0') * 10) + ((s)[1] - '0')) + +typedef struct time_rng { + time_t low_time; /* lower inclusive time limit */ + time_t high_time; /* higher inclusive time limit */ + int flgs; /* option flags */ +#define HASLOW 0x01 /* has lower time limit */ +#define HASHIGH 0x02 /* has higher time limit */ +#define CMPMTME 0x04 /* compare file modification time */ +#define CMPCTME 0x08 /* compare inode change time */ +#define CMPBOTH (CMPMTME|CMPCTME) /* compare inode and mod time */ + struct time_rng *fow; /* next pattern */ +} TIME_RNG; diff --git a/commands/pax/tables.c b/commands/pax/tables.c new file mode 100644 index 000000000..41db2934a --- /dev/null +++ b/commands/pax/tables.c @@ -0,0 +1,1284 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)tables.c 8.1 (Berkeley) 5/31/93"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "tables.h" +#include "extern.h" + +/* + * Routines for controlling the contents of all the different databases pax + * keeps. Tables are dynamically created only when they are needed. The + * goal was speed and the ability to work with HUGE archives. The databases + * were kept simple, but do have complex rules for when the contents change. + * As of this writing, the POSIX library functions were more complex than + * needed for this application (pax databases have very short lifetimes and + * do not survive after pax is finished). Pax is required to handle very + * large archives. These database routines carefully combine memory usage and + * temporary file storage in ways which will not significantly impact runtime + * performance while allowing the largest possible archives to be handled. + * Trying to force the fit to the POSIX databases routines was not considered + * time well spent. + */ + +static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ +static FTM **ftab = NULL; /* file time table for updating arch */ +static NAMT **ntab = NULL; /* interactive rename storage table */ +static DEVT **dtab = NULL; /* device/inode mapping tables */ +static ATDIR **atab = NULL; /* file tree directory time reset table */ +static int dirfd = -1; /* storage for setting created dir time/mode */ +static u_long dircnt; /* entries in dir time/mode storage */ +static int ffd = -1; /* tmp file for file time table name storage */ + +static DEVT *chk_dev(dev_t, int); + +/* + * hard link table routines + * + * The hard link table tries to detect hard links to files using the device and + * inode values. We do this when writing an archive, so we can tell the format + * write routine that this file is a hard link to another file. The format + * write routine then can store this file in whatever way it wants (as a hard + * link if the format supports that like tar, or ignore this info like cpio). + * (Actually a field in the format driver table tells us if the format wants + * hard link info. if not, we do not waste time looking for them). We also use + * the same table when reading an archive. In that situation, this table is + * used by the format read routine to detect hard links from stored dev and + * inode numbers (like cpio). This will allow pax to create a link when one + * can be detected by the archive format. + */ + +/* + * lnk_start + * Creates the hard link table. + * Return: + * 0 if created, -1 if failure + */ + +int +lnk_start(void) +{ + if (ltab != NULL) + return(0); + if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { + paxwarn(1, "Cannot allocate memory for hard link table"); + return(-1); + } + return(0); +} + +/* + * chk_lnk() + * Looks up entry in hard link hash table. If found, it copies the name + * of the file it is linked to (we already saw that file) into ln_name. + * lnkcnt is decremented and if goes to 1 the node is deleted from the + * database. (We have seen all the links to this file). If not found, + * we add the file to the database if it has the potential for having + * hard links to other files we may process (it has a link count > 1) + * Return: + * if found returns 1; if not found returns 0; -1 on error + */ + +int +chk_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return(-1); + /* + * ignore those nodes that cannot have hard links + */ + if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) + return(0); + + /* + * hash inode number and look for this file + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) != NULL) { + /* + * it's hash chain in not empty, walk down looking for it + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found a link. set the node type and copy in the + * name of the file it is to link to. we need to + * handle hardlinks to regular files differently than + * other links. + */ + arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name, + sizeof(arcn->ln_name) - 1); + arcn->ln_name[arcn->ln_nlen] = '\0'; + if (arcn->type == PAX_REG) + arcn->type = PAX_HRG; + else + arcn->type = PAX_HLK; + + /* + * if we have found all the links to this file, remove + * it from the database + */ + if (--pt->nlink <= 1) { + *ppt = pt->fow; + (void)free((char *)pt->name); + (void)free((char *)pt); + } + return(1); + } + } + + /* + * we never saw this file before. It has links so we add it to the + * front of this hash chain + */ + if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) { + if ((pt->name = strdup(arcn->name)) != NULL) { + pt->dev = arcn->sb.st_dev; + pt->ino = arcn->sb.st_ino; + pt->nlink = arcn->sb.st_nlink; + pt->fow = ltab[indx]; + ltab[indx] = pt; + return(0); + } + (void)free((char *)pt); + } + + paxwarn(1, "Hard link table out of memory"); + return(-1); +} + +/* + * purg_lnk + * remove reference for a file that we may have added to the data base as + * a potential source for hard links. We ended up not using the file, so + * we do not want to accidently point another file at it later on. + */ + +void +purg_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return; + /* + * do not bother to look if it could not be in the database + */ + if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || + (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) + return; + + /* + * find the hash chain for this inode value, if empty return + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) == NULL) + return; + + /* + * walk down the list looking for the inode/dev pair, unlink and + * free if found + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + if (pt == NULL) + return; + + /* + * remove and free it + */ + *ppt = pt->fow; + (void)free((char *)pt->name); + (void)free((char *)pt); +} + +/* + * lnk_end() + * Pull apart an existing link table so we can reuse it. We do this between + * read and write phases of append with update. (The format may have + * used the link table, and we need to start with a fresh table for the + * write phase). + */ + +void +lnk_end(void) +{ + int i; + HRDLNK *pt; + HRDLNK *ppt; + + if (ltab == NULL) + return; + + for (i = 0; i < L_TAB_SZ; ++i) { + if (ltab[i] == NULL) + continue; + pt = ltab[i]; + ltab[i] = NULL; + + /* + * free up each entry on this chain + */ + while (pt != NULL) { + ppt = pt; + pt = ppt->fow; + (void)free((char *)ppt->name); + (void)free((char *)ppt); + } + } + return; +} + +/* + * modification time table routines + * + * The modification time table keeps track of last modification times for all + * files stored in an archive during a write phase when -u is set. We only + * add a file to the archive if it is newer than a file with the same name + * already stored on the archive (if there is no other file with the same + * name on the archive it is added). This applies to writes and appends. + * An append with an -u must read the archive and store the modification time + * for every file on that archive before starting the write phase. It is clear + * that this is one HUGE database. To save memory space, the actual file names + * are stored in a scatch file and indexed by an in memory hash table. The + * hash table is indexed by hashing the file path. The nodes in the table store + * the length of the filename and the lseek offset within the scratch file + * where the actual name is stored. Since there are never any deletions to this + * table, fragmentation of the scratch file is never an issue. Lookups seem to + * not exhibit any locality at all (files in the database are rarely + * looked up more than once...). So caching is just a waste of memory. The + * only limitation is the amount of scatch file space available to store the + * path names. + */ + +/* + * ftime_start() + * create the file time hash table and open for read/write the scratch + * file. (after created it is unlinked, so when we exit we leave + * no witnesses). + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +int +ftime_start(void) +{ + + if (ftab != NULL) + return(0); + if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { + paxwarn(1, "Cannot allocate memory for file time table"); + return(-1); + } + + /* + * get random name and create temporary scratch file, unlink name + * so it will get removed on exit + */ + memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); + if ((ffd = mkstemp(tempfile)) < 0) { + syswarn(1, errno, "Unable to create temporary file: %s", + tempfile); + return(-1); + } + (void)unlink(tempfile); + + return(0); +} + +/* + * chk_ftime() + * looks up entry in file time hash table. If not found, the file is + * added to the hash table and the file named stored in the scratch file. + * If a file with the same name is found, the file times are compared and + * the most recent file time is retained. If the new file was younger (or + * was not in the database) the new file is selected for storage. + * Return: + * 0 if file should be added to the archive, 1 if it should be skipped, + * -1 on error + */ + +int +chk_ftime(ARCHD *arcn) +{ + FTM *pt; + int namelen; + u_int indx; + char ckname[PAXPATHLEN+1]; + + /* + * no info, go ahead and add to archive + */ + if (ftab == NULL) + return(0); + + /* + * hash the pathname and look up in table + */ + namelen = arcn->nlen; + indx = st_hash(arcn->name, namelen, F_TAB_SZ); + if ((pt = ftab[indx]) != NULL) { + /* + * the hash chain is not empty, walk down looking for match + * only read up the path names if the lengths match, speeds + * up the search a lot + */ + while (pt != NULL) { + if (pt->namelen == namelen) { + /* + * potential match, have to read the name + * from the scratch file. + */ + if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { + syswarn(1, errno, + "Failed ftime table seek"); + return(-1); + } + if (read(ffd, ckname, namelen) != namelen) { + syswarn(1, errno, + "Failed ftime table read"); + return(-1); + } + + /* + * if the names match, we are done + */ + if (!strncmp(ckname, arcn->name, namelen)) + break; + } + + /* + * try the next entry on the chain + */ + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found the file, compare the times, save the newer + */ + if (arcn->sb.st_mtime > pt->mtime) { + /* + * file is newer + */ + pt->mtime = arcn->sb.st_mtime; + return(0); + } + /* + * file is older + */ + return(1); + } + } + + /* + * not in table, add it + */ + if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) { + /* + * add the name at the end of the scratch file, saving the + * offset. add the file to the head of the hash chain + */ + if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) { + if (write(ffd, arcn->name, namelen) == namelen) { + pt->mtime = arcn->sb.st_mtime; + pt->namelen = namelen; + pt->fow = ftab[indx]; + ftab[indx] = pt; + return(0); + } + syswarn(1, errno, "Failed write to file time table"); + } else + syswarn(1, errno, "Failed seek on file time table"); + } else + paxwarn(1, "File time table ran out of memory"); + + if (pt != NULL) + (void)free((char *)pt); + return(-1); +} + +/* + * Interactive rename table routines + * + * The interactive rename table keeps track of the new names that the user + * assigns to files from tty input. Since this map is unique for each file + * we must store it in case there is a reference to the file later in archive + * (a link). Otherwise we will be unable to find the file we know was + * extracted. The remapping of these files is stored in a memory based hash + * table (it is assumed since input must come from /dev/tty, it is unlikely to + * be a very large table). + */ + +/* + * name_start() + * create the interactive rename table + * Return: + * 0 if successful, -1 otherwise + */ + +int +name_start(void) +{ + if (ntab != NULL) + return(0); + if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { + paxwarn(1, "Cannot allocate memory for interactive rename table"); + return(-1); + } + return(0); +} + +/* + * add_name() + * add the new name to old name mapping just created by the user. + * If an old name mapping is found (there may be duplicate names on an + * archive) only the most recent is kept. + * Return: + * 0 if added, -1 otherwise + */ + +int +add_name(char *oname, int onamelen, char *nname) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) { + /* + * should never happen + */ + paxwarn(0, "No interactive rename table, links may fail\n"); + return(0); + } + + /* + * look to see if we have already mapped this file, if so we + * will update it + */ + indx = st_hash(oname, onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) != NULL) { + /* + * look down the has chain for the file + */ + while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) + pt = pt->fow; + + if (pt != NULL) { + /* + * found an old mapping, replace it with the new one + * the user just input (if it is different) + */ + if (strcmp(nname, pt->nname) == 0) + return(0); + + (void)free((char *)pt->nname); + if ((pt->nname = strdup(nname)) == NULL) { + paxwarn(1, "Cannot update rename table"); + return(-1); + } + return(0); + } + } + + /* + * this is a new mapping, add it to the table + */ + if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) { + if ((pt->oname = strdup(oname)) != NULL) { + if ((pt->nname = strdup(nname)) != NULL) { + pt->fow = ntab[indx]; + ntab[indx] = pt; + return(0); + } + (void)free((char *)pt->oname); + } + (void)free((char *)pt); + } + paxwarn(1, "Interactive rename table out of memory"); + return(-1); +} + +/* + * sub_name() + * look up a link name to see if it points at a file that has been + * remapped by the user. If found, the link is adjusted to contain the + * new name (oname is the link to name) + */ + +void +sub_name(char *oname, int *onamelen, size_t onamesize) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) + return; + /* + * look the name up in the hash table + */ + indx = st_hash(oname, *onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) == NULL) + return; + + while (pt != NULL) { + /* + * walk down the hash chain looking for a match + */ + if (strcmp(oname, pt->oname) == 0) { + /* + * found it, replace it with the new name + * and return (we know that oname has enough space) + */ + *onamelen = l_strncpy(oname, pt->nname, onamesize - 1); + oname[*onamelen] = '\0'; + return; + } + pt = pt->fow; + } + + /* + * no match, just return + */ + return; +} + +/* + * device/inode mapping table routines + * (used with formats that store device and inodes fields) + * + * device/inode mapping tables remap the device field in an archive header. The + * device/inode fields are used to determine when files are hard links to each + * other. However these values have very little meaning outside of that. This + * database is used to solve one of two different problems. + * + * 1) when files are appended to an archive, while the new files may have hard + * links to each other, you cannot determine if they have hard links to any + * file already stored on the archive from a prior run of pax. We must assume + * that these inode/device pairs are unique only within a SINGLE run of pax + * (which adds a set of files to an archive). So we have to make sure the + * inode/dev pairs we add each time are always unique. We do this by observing + * while the inode field is very dense, the use of the dev field is fairly + * sparse. Within each run of pax, we remap any device number of a new archive + * member that has a device number used in a prior run and already stored in a + * file on the archive. During the read phase of the append, we store the + * device numbers used and mark them to not be used by any file during the + * write phase. If during write we go to use one of those old device numbers, + * we remap it to a new value. + * + * 2) Often the fields in the archive header used to store these values are + * too small to store the entire value. The result is an inode or device value + * which can be truncated. This really can foul up an archive. With truncation + * we end up creating links between files that are really not links (after + * truncation the inodes are the same value). We address that by detecting + * truncation and forcing a remap of the device field to split truncated + * inodes away from each other. Each truncation creates a pattern of bits that + * are removed. We use this pattern of truncated bits to partition the inodes + * on a single device to many different devices (each one represented by the + * truncated bit pattern). All inodes on the same device that have the same + * truncation pattern are mapped to the same new device. Two inodes that + * truncate to the same value clearly will always have different truncation + * bit patterns, so they will be split from away each other. When we spot + * device truncation we remap the device number to a non truncated value. + * (for more info see table.h for the data structures involved). + */ + +/* + * dev_start() + * create the device mapping table + * Return: + * 0 if successful, -1 otherwise + */ + +int +dev_start(void) +{ + if (dtab != NULL) + return(0); + if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { + paxwarn(1, "Cannot allocate memory for device mapping table"); + return(-1); + } + return(0); +} + +/* + * add_dev() + * add a device number to the table. this will force the device to be + * remapped to a new value if it be used during a write phase. This + * function is called during the read phase of an append to prohibit the + * use of any device number already in the archive. + * Return: + * 0 if added ok, -1 otherwise + */ + +int +add_dev(ARCHD *arcn) +{ + if (chk_dev(arcn->sb.st_dev, 1) == NULL) + return(-1); + return(0); +} + +/* + * chk_dev() + * check for a device value in the device table. If not found and the add + * flag is set, it is added. This does NOT assign any mapping values, just + * adds the device number as one that need to be remapped. If this device + * is already mapped, just return with a pointer to that entry. + * Return: + * pointer to the entry for this device in the device map table. Null + * if the add flag is not set and the device is not in the table (it is + * not been seen yet). If add is set and the device cannot be added, null + * is returned (indicates an error). + */ + +static DEVT * +chk_dev(dev_t dev, int add) +{ + DEVT *pt; + u_int indx; + + if (dtab == NULL) + return(NULL); + /* + * look to see if this device is already in the table + */ + indx = ((unsigned)dev) % D_TAB_SZ; + if ((pt = dtab[indx]) != NULL) { + while ((pt != NULL) && (pt->dev != dev)) + pt = pt->fow; + + /* + * found it, return a pointer to it + */ + if (pt != NULL) + return(pt); + } + + /* + * not in table, we add it only if told to as this may just be a check + * to see if a device number is being used. + */ + if (add == 0) + return(NULL); + + /* + * allocate a node for this device and add it to the front of the hash + * chain. Note we do not assign remaps values here, so the pt->list + * list must be NULL. + */ + if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) { + paxwarn(1, "Device map table out of memory"); + return(NULL); + } + pt->dev = dev; + pt->list = NULL; + pt->fow = dtab[indx]; + dtab[indx] = pt; + return(pt); +} +/* + * map_dev() + * given an inode and device storage mask (the mask has a 1 for each bit + * the archive format is able to store in a header), we check for inode + * and device truncation and remap the device as required. Device mapping + * can also occur when during the read phase of append a device number was + * seen (and was marked as do not use during the write phase). WE ASSUME + * that unsigned longs are the same size or bigger than the fields used + * for ino_t and dev_t. If not the types will have to be changed. + * Return: + * 0 if all ok, -1 otherwise. + */ + +int +map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) +{ + DEVT *pt; + DLIST *dpt; + static dev_t lastdev = 0; /* next device number to try */ + int trc_ino = 0; + int trc_dev = 0; + ino_t trunc_bits = 0; + ino_t nino; + + if (dtab == NULL) + return(0); + /* + * check for device and inode truncation, and extract the truncated + * bit pattern. + */ + if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) + ++trc_dev; + if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { + ++trc_ino; + trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); + } + + /* + * see if this device is already being mapped, look up the device + * then find the truncation bit pattern which applies + */ + if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { + /* + * this device is already marked to be remapped + */ + for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) + if (dpt->trunc_bits == trunc_bits) + break; + + if (dpt != NULL) { + /* + * we are being remapped for this device and pattern + * change the device number to be stored and return + */ + arcn->sb.st_dev = dpt->dev; + arcn->sb.st_ino = nino; + return(0); + } + } else { + /* + * this device is not being remapped YET. if we do not have any + * form of truncation, we do not need a remap + */ + if (!trc_ino && !trc_dev) + return(0); + + /* + * we have truncation, have to add this as a device to remap + */ + if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) + goto bad; + + /* + * if we just have a truncated inode, we have to make sure that + * all future inodes that do not truncate (they have the + * truncation pattern of all 0's) continue to map to the same + * device number. We probably have already written inodes with + * this device number to the archive with the truncation + * pattern of all 0's. So we add the mapping for all 0's to the + * same device number. + */ + if (!trc_dev && (trunc_bits != 0)) { + if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL) + goto bad; + dpt->trunc_bits = 0; + dpt->dev = arcn->sb.st_dev; + dpt->fow = pt->list; + pt->list = dpt; + } + } + + /* + * look for a device number not being used. We must watch for wrap + * around on lastdev (so we do not get stuck looking forever!) + */ + while (++lastdev > 0) { + if (chk_dev(lastdev, 0) != NULL) + continue; + /* + * found an unused value. If we have reached truncation point + * for this format we are hosed, so we give up. Otherwise we + * mark it as being used. + */ + if (((lastdev & ((dev_t)dev_mask)) != lastdev) || + (chk_dev(lastdev, 1) == NULL)) + goto bad; + break; + } + + if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)) + goto bad; + + /* + * got a new device number, store it under this truncation pattern. + * change the device number this file is being stored with. + */ + dpt->trunc_bits = trunc_bits; + dpt->dev = lastdev; + dpt->fow = pt->list; + pt->list = dpt; + arcn->sb.st_dev = lastdev; + arcn->sb.st_ino = nino; + return(0); + + bad: + paxwarn(1, "Unable to fix truncated inode/device field when storing %s", + arcn->name); + paxwarn(0, "Archive may create improper hard links when extracted"); + return(0); +} + +/* + * directory access/mod time reset table routines (for directories READ by pax) + * + * The pax -t flag requires that access times of archive files to be the same + * before being read by pax. For regular files, access time is restored after + * the file has been copied. This database provides the same functionality for + * directories read during file tree traversal. Restoring directory access time + * is more complex than files since directories may be read several times until + * all the descendants in their subtree are visited by fts. Directory access + * and modification times are stored during the fts pre-order visit (done + * before any descendants in the subtree is visited) and restored after the + * fts post-order visit (after all the descendants have been visited). In the + * case of premature exit from a subtree (like from the effects of -n), any + * directory entries left in this database are reset during final cleanup + * operations of pax. Entries are hashed by inode number for fast lookup. + */ + +/* + * atdir_start() + * create the directory access time database for directories READ by pax. + * Return: + * 0 is created ok, -1 otherwise. + */ + +int +atdir_start(void) +{ + if (atab != NULL) + return(0); + if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { + paxwarn(1,"Cannot allocate space for directory access time table"); + return(-1); + } + return(0); +} + + +/* + * atdir_end() + * walk through the directory access time table and reset the access time + * of any directory who still has an entry left in the database. These + * entries are for directories READ by pax + */ + +void +atdir_end(void) +{ + ATDIR *pt; + int i; + + if (atab == NULL) + return; + /* + * for each non-empty hash table entry reset all the directories + * chained there. + */ + for (i = 0; i < A_TAB_SZ; ++i) { + if ((pt = atab[i]) == NULL) + continue; + /* + * remember to force the times, set_ftime() looks at pmtime + * and patime, which only applies to things CREATED by pax, + * not read by pax. Read time reset is controlled by -t. + */ + for (; pt != NULL; pt = pt->fow) + set_ftime(pt->name, pt->mtime, pt->atime, 1); + } +} + +/* + * add_atdir() + * add a directory to the directory access time table. Table is hashed + * and chained by inode number. This is for directories READ by pax + */ + +void +add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime) +{ + ATDIR *pt; + u_int indx; + + if (atab == NULL) + return; + + /* + * make sure this directory is not already in the table, if so just + * return (the older entry always has the correct time). The only + * way this will happen is when the same subtree can be traversed by + * different args to pax and the -n option is aborting fts out of a + * subtree before all the post-order visits have been made). + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) != NULL) { + while (pt != NULL) { + if ((pt->ino == ino) && (pt->dev == dev)) + break; + pt = pt->fow; + } + + /* + * oops, already there. Leave it alone. + */ + if (pt != NULL) + return; + } + + /* + * add it to the front of the hash chain + */ + if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) { + if ((pt->name = strdup(fname)) != NULL) { + pt->dev = dev; + pt->ino = ino; + pt->mtime = mtime; + pt->atime = atime; + pt->fow = atab[indx]; + atab[indx] = pt; + return; + } + (void)free((char *)pt); + } + + paxwarn(1, "Directory access time reset table ran out of memory"); + return; +} + +/* + * get_atdir() + * look up a directory by inode and device number to obtain the access + * and modification time you want to set to. If found, the modification + * and access time parameters are set and the entry is removed from the + * table (as it is no longer needed). These are for directories READ by + * pax + * Return: + * 0 if found, -1 if not found. + */ + +int +get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime) +{ + ATDIR *pt; + ATDIR **ppt; + u_int indx; + + if (atab == NULL) + return(-1); + /* + * hash by inode and search the chain for an inode and device match + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) == NULL) + return(-1); + + ppt = &(atab[indx]); + while (pt != NULL) { + if ((pt->ino == ino) && (pt->dev == dev)) + break; + /* + * no match, go to next one + */ + ppt = &(pt->fow); + pt = pt->fow; + } + + /* + * return if we did not find it. + */ + if (pt == NULL) + return(-1); + + /* + * found it. return the times and remove the entry from the table. + */ + *ppt = pt->fow; + *mtime = pt->mtime; + *atime = pt->atime; + (void)free((char *)pt->name); + (void)free((char *)pt); + return(0); +} + +/* + * directory access mode and time storage routines (for directories CREATED + * by pax). + * + * Pax requires that extracted directories, by default, have their access/mod + * times and permissions set to the values specified in the archive. During the + * actions of extracting (and creating the destination subtree during -rw copy) + * directories extracted may be modified after being created. Even worse is + * that these directories may have been created with file permissions which + * prohibits any descendants of these directories from being extracted. When + * directories are created by pax, access rights may be added to permit the + * creation of files in their subtree. Every time pax creates a directory, the + * times and file permissions specified by the archive are stored. After all + * files have been extracted (or copied), these directories have their times + * and file modes reset to the stored values. The directory info is restored in + * reverse order as entries were added to the data file from root to leaf. To + * restore atime properly, we must go backwards. The data file consists of + * records with two parts, the file name followed by a DIRDATA trailer. The + * fixed sized trailer contains the size of the name plus the off_t location in + * the file. To restore we work backwards through the file reading the trailer + * then the file name. + */ + +/* + * dir_start() + * set up the directory time and file mode storage for directories CREATED + * by pax. + * Return: + * 0 if ok, -1 otherwise + */ + +int +dir_start(void) +{ + + if (dirfd != -1) + return(0); + + /* + * unlink the file so it goes away at termination by itself + */ + memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); + if ((dirfd = mkstemp(tempfile)) >= 0) { + (void)unlink(tempfile); + return(0); + } + paxwarn(1, "Unable to create temporary file for directory times: %s", + tempfile); + return(-1); +} + +/* + * add_dir() + * add the mode and times for a newly CREATED directory + * name is name of the directory, psb the stat buffer with the data in it, + * frc_mode is a flag that says whether to force the setting of the mode + * (ignoring the user set values for preserving file mode). Frc_mode is + * for the case where we created a file and found that the resulting + * directory was not writeable and the user asked for file modes to NOT + * be preserved. (we have to preserve what was created by default, so we + * have to force the setting at the end. this is stated explicitly in the + * pax spec) + */ + +void +add_dir(char *name, int nlen, struct stat *psb, int frc_mode) +{ + DIRDATA dblk; + + if (dirfd < 0) + return; + + /* + * get current position (where file name will start) so we can store it + * in the trailer + */ + if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) { + paxwarn(1,"Unable to store mode and times for directory: %s",name); + return; + } + + /* + * write the file name followed by the trailer + */ + dblk.nlen = nlen + 1; + dblk.mode = psb->st_mode & 0xffff; + dblk.mtime = psb->st_mtime; + dblk.atime = psb->st_atime; + dblk.frc_mode = frc_mode; + if ((write(dirfd, name, dblk.nlen) == dblk.nlen) && + (write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) { + ++dircnt; + return; + } + + paxwarn(1,"Unable to store mode and times for created directory: %s",name); + return; +} + +/* + * proc_dir() + * process all file modes and times stored for directories CREATED + * by pax + */ + +void +proc_dir(void) +{ + char name[PAXPATHLEN+1]; + DIRDATA dblk; + u_long cnt; + + if (dirfd < 0) + return; + /* + * read backwards through the file and process each directory + */ + for (cnt = 0; cnt < dircnt; ++cnt) { + /* + * read the trailer, then the file name, if this fails + * just give up. + */ + if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0) + break; + if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk)) + break; + if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) + break; + if (read(dirfd, name, dblk.nlen) != dblk.nlen) + break; + if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) + break; + + /* + * frc_mode set, make sure we set the file modes even if + * the user didn't ask for it (see file_subs.c for more info) + */ + if (pmode || dblk.frc_mode) + set_pmode(name, dblk.mode); + if (patime || pmtime) + set_ftime(name, dblk.mtime, dblk.atime, 0); + } + + (void)close(dirfd); + dirfd = -1; + if (cnt != dircnt) + paxwarn(1,"Unable to set mode and times for created directories"); + return; +} + +/* + * database independent routines + */ + +/* + * st_hash() + * hashes filenames to a u_int for hashing into a table. Looks at the tail + * end of file, as this provides far better distribution than any other + * part of the name. For performance reasons we only care about the last + * MAXKEYLEN chars (should be at LEAST large enough to pick off the file + * name). Was tested on 500,000 name file tree traversal from the root + * and gave almost a perfectly uniform distribution of keys when used with + * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) + * chars at a time and pads with 0 for last addition. + * Return: + * the hash value of the string MOD (%) the table size. + */ + +u_int +st_hash(char *name, int len, int tabsz) +{ + char *pt; + char *dest; + char *end; + int i; + u_int key = 0; + int steps; + int res; + u_int val; + + /* + * only look at the tail up to MAXKEYLEN, we do not need to waste + * time here (remember these are pathnames, the tail is what will + * spread out the keys) + */ + if (len > MAXKEYLEN) { + pt = &(name[len - MAXKEYLEN]); + len = MAXKEYLEN; + } else + pt = name; + + /* + * calculate the number of u_int size steps in the string and if + * there is a runt to deal with + */ + steps = len/sizeof(u_int); + res = len % sizeof(u_int); + + /* + * add up the value of the string in unsigned integer sized pieces + * too bad we cannot have unsigned int aligned strings, then we + * could avoid the expensive copy. + */ + for (i = 0; i < steps; ++i) { + end = pt + sizeof(u_int); + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * add in the runt padded with zero to the right + */ + if (res) { + val = 0; + end = pt + res; + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * return the result mod the table size + */ + return(key % tabsz); +} diff --git a/commands/pax/tables.h b/commands/pax/tables.h new file mode 100644 index 000000000..9d0c5d8d7 --- /dev/null +++ b/commands/pax/tables.h @@ -0,0 +1,169 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tables.h 8.1 (Berkeley) 5/31/93 + * $FreeBSD: src/bin/pax/tables.h,v 1.10 2004/04/06 20:06:48 markm Exp $ + */ + +/* + * data structures and constants used by the different databases kept by pax + */ + +/* + * Hash Table Sizes MUST BE PRIME, if set too small performance suffers. + * Probably safe to expect 500000 inodes per tape. Assuming good key + * distribution (inodes) chains of under 50 long (worse case) is ok. + */ +#define L_TAB_SZ 2503 /* hard link hash table size */ +#define F_TAB_SZ 50503 /* file time hash table size */ +#define N_TAB_SZ 541 /* interactive rename hash table */ +#define D_TAB_SZ 317 /* unique device mapping table */ +#define A_TAB_SZ 317 /* ftree dir access time reset table */ +#define MAXKEYLEN 64 /* max number of chars for hash */ + +/* + * file hard link structure (hashed by dev/ino and chained) used to find the + * hard links in a file system or with some archive formats (cpio) + */ +typedef struct hrdlnk { + char *name; /* name of first file seen with this ino/dev */ + dev_t dev; /* files device number */ + ino_t ino; /* files inode number */ + u_long nlink; /* expected link count */ + struct hrdlnk *fow; +} HRDLNK; + +/* + * Archive write update file time table (the -u, -C flag), hashed by filename. + * Filenames are stored in a scratch file at seek offset into the file. The + * file time (mod time) and the file name length (for a quick check) are + * stored in a hash table node. We were forced to use a scratch file because + * with -u, the mtime for every node in the archive must always be available + * to compare against (and this data can get REALLY large with big archives). + * By being careful to read only when we have a good chance of a match, the + * performance loss is not measurable (and the size of the archive we can + * handle is greatly increased). + */ +typedef struct ftm { + int namelen; /* file name length */ + time_t mtime; /* files last modification time */ + off_t seek; /* location in scratch file */ + struct ftm *fow; +} FTM; + +/* + * Interactive rename table (-i flag), hashed by orig filename. + * We assume this will not be a large table as this mapping data can only be + * obtained through interactive input by the user. Nobody is going to type in + * changes for 500000 files? We use chaining to resolve collisions. + */ + +typedef struct namt { + char *oname; /* old name */ + char *nname; /* new name typed in by the user */ + struct namt *fow; +} NAMT; + +/* + * Unique device mapping tables. Some protocols (e.g. cpio) require that the + * pair will uniquely identify a file in an archive unless they + * are links to the same file. Appending to archives can break this. For those + * protocols that have this requirement we map c_dev to a unique value not seen + * in the archive when we append. We also try to handle inode truncation with + * this table. (When the inode field in the archive header are too small, we + * remap the dev on writes to remove accidental collisions). + * + * The list is hashed by device number using chain collision resolution. Off of + * each DEVT are linked the various remaps for this device based on those bits + * in the inode which were truncated. For example if we are just remapping to + * avoid a device number during an update append, off the DEVT we would have + * only a single DLIST that has a truncation id of 0 (no inode bits were + * stripped for this device so far). When we spot inode truncation we create + * a new mapping based on the set of bits in the inode which were stripped off. + * so if the top four bits of the inode are stripped and they have a pattern of + * 0110...... (where . are those bits not truncated) we would have a mapping + * assigned for all inodes that has the same 0110.... pattern (with this dev + * number of course). This keeps the mapping sparse and should be able to store + * close to the limit of files which can be represented by the optimal + * combination of dev and inode bits, and without creating a fouled up archive. + * Note we also remap truncated devs in the same way (an exercise for the + * dedicated reader; always wanted to say that...:) + */ + +typedef struct devt { + dev_t dev; /* the orig device number we now have to map */ + struct devt *fow; /* new device map list */ + struct dlist *list; /* map list based on inode truncation bits */ +} DEVT; + +typedef struct dlist { + ino_t trunc_bits; /* truncation pattern for a specific map */ + dev_t dev; /* the new device id we use */ + struct dlist *fow; +} DLIST; + +/* + * ftree directory access time reset table. When we are done with with a + * subtree we reset the access and mod time of the directory when the tflag is + * set. Not really explicitly specified in the pax spec, but easy and fast to + * do (and this may have even been intended in the spec, it is not clear). + * table is hashed by inode with chaining. + */ + +typedef struct atdir { + char *name; /* name of directory to reset */ + dev_t dev; /* dev and inode for fast lookup */ + ino_t ino; + time_t mtime; /* access and mod time to reset to */ + time_t atime; + struct atdir *fow; +} ATDIR; + +/* + * created directory time and mode storage entry. After pax is finished during + * extraction or copy, we must reset directory access modes and times that + * may have been modified after creation (they no longer have the specified + * times and/or modes). We must reset time in the reverse order of creation, + * because entries are added from the top of the file tree to the bottom. + * We MUST reset times from leaf to root (it will not work the other + * direction). Entries are recorded into a spool file to make reverse + * reading faster. + */ + +typedef struct dirdata { + int nlen; /* length of the directory name (includes \0) */ + off_t npos; /* position in file where this dir name starts */ + mode_t mode; /* file mode to restore */ + time_t mtime; /* mtime to set */ + time_t atime; /* atime to set */ + int frc_mode; /* do we force mode settings? */ +} DIRDATA; diff --git a/commands/pax/tar.c b/commands/pax/tar.c new file mode 100644 index 000000000..0ea3d9104 --- /dev/null +++ b/commands/pax/tar.c @@ -0,0 +1,1121 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" +#include "tar.h" + +/* + * Routines for reading, writing and header identify of various versions of tar + */ + +static u_long tar_chksm(char *, int); +static char *name_split(char *, int); +static int ul_oct(u_long, char *, int, int); +#ifndef NET2_STAT +static int uqd_oct(u_quad_t, char *, int, int); +#endif + +/* + * Routines common to all versions of tar + */ + +static int tar_nodir; /* do not write dirs under old tar */ + +/* + * tar_endwr() + * add the tar trailer of two null blocks + * Return: + * 0 if ok, -1 otherwise (what wr_skip returns) + */ + +int +tar_endwr(void) +{ + return(wr_skip((off_t)(NULLCNT*BLKMULT))); +} + +/* + * tar_endrd() + * no cleanup needed here, just return size of trailer (for append) + * Return: + * size of trailer (2 * BLKMULT) + */ + +off_t +tar_endrd(void) +{ + return((off_t)(NULLCNT*BLKMULT)); +} + +/* + * tar_trail() + * Called to determine if a header block is a valid trailer. We are passed + * the block, the in_sync flag (which tells us we are in resync mode; + * looking for a valid header), and cnt (which starts at zero) which is + * used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block + * could never contain a header. + */ + +int +tar_trail(char *buf, int in_resync, int *cnt) +{ + int i; + + /* + * look for all zero, trailer is two consecutive blocks of zero + */ + for (i = 0; i < BLKMULT; ++i) { + if (buf[i] != '\0') + break; + } + + /* + * if not all zero it is not a trailer, but MIGHT be a header. + */ + if (i != BLKMULT) + return(-1); + + /* + * When given a zero block, we must be careful! + * If we are not in resync mode, check for the trailer. Have to watch + * out that we do not mis-identify file data as the trailer, so we do + * NOT try to id a trailer during resync mode. During resync mode we + * might as well throw this block out since a valid header can NEVER be + * a block of all 0 (we must have a valid file name). + */ + if (!in_resync && (++*cnt >= NULLCNT)) + return(0); + return(1); +} + +/* + * ul_oct() + * convert an unsigned long to an octal string. many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is '0' padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +ul_oct(u_long val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch(term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = val >> 3) == (u_long)0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != (u_long)0) + return(-1); + return(0); +} + +#ifndef NET2_STAT +/* + * uqd_oct() + * convert an u_quad_t to an octal string. one of many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is '0' padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +uqd_oct(u_quad_t val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch(term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + if ((val = val >> 3) == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != (u_quad_t)0) + return(-1); + return(0); +} +#endif + +/* + * tar_chksm() + * calculate the checksum for a tar block counting the checksum field as + * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). + * NOTE: we use len to short circuit summing 0's on write since we ALWAYS + * pad headers with 0. + * Return: + * unsigned long checksum + */ + +static u_long +tar_chksm(char *blk, int len) +{ + char *stop; + char *pt; + u_long chksm = BLNKSUM; /* initial value is checksum field sum */ + + /* + * add the part of the block before the checksum field + */ + pt = blk; + stop = blk + CHK_OFFSET; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + /* + * move past the checksum field and keep going, spec counts the + * checksum field as the sum of 8 blanks (which is pre-computed as + * BLNKSUM). + * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding + * starts, no point in summing zero's) + */ + pt += CHK_LEN; + stop = blk + len; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + return(chksm); +} + +/* + * Routines for old BSD style tar (also made portable to sysV tar) + */ + +/* + * tar_id() + * determine if a block given to us is a valid tar header (and not a USTAR + * header). We have to be on the lookout for those pesky blocks of all + * zero's. + * Return: + * 0 if a tar header, -1 otherwise + */ + +int +tar_id(char *blk, int size) +{ + HD_TAR *hd; + HD_USTAR *uhd; + + if (size < BLKMULT) + return(-1); + hd = (HD_TAR *)blk; + uhd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test, then make + * sure this is not a ustar header by looking for the ustar magic + * cookie. We should use TMAGLEN, but some USTAR archive programs are + * wrong and create archives missing the \0. Last we check the + * checksum. If this is ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return(-1); + if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + return(0); +} + +/* + * tar_opt() + * handle tar format specific -o options + * Return: + * 0 if ok -1 otherwise + */ + +int +tar_opt(void) +{ + OPLIST *opt; + + while ((opt = opt_next()) != NULL) { + if (strcmp(opt->name, TAR_OPTION) || + strcmp(opt->value, TAR_NODIR)) { + paxwarn(1, "Unknown tar format -o option/value pair %s=%s", + opt->name, opt->value); + paxwarn(1,"%s=%s is the only supported tar format option", + TAR_OPTION, TAR_NODIR); + return(-1); + } + + /* + * we only support one option, and only when writing + */ + if ((act != APPND) && (act != ARCHIVE)) { + paxwarn(1, "%s=%s is only supported when writing.", + opt->name, opt->value); + return(-1); + } + tar_nodir = 1; + } + return(0); +} + + +/* + * tar_rd() + * extract the values out of block already determined to be a tar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +tar_rd(ARCHD *arcn, char *buf) +{ + HD_TAR *hd; + char *pt; + + /* + * we only get proper sized buffers passed to us + */ + if (tar_id(buf, BLKMULT) < 0) + return(-1); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + arcn->pat = NULL; + + /* + * copy out the name and values in the stat buffer + */ + hd = (HD_TAR *)buf; + /* + * old tar format specifies the name always be null-terminated, + * but let's be robust to broken archives. + * the same applies to handling links below. + */ + arcn->nlen = l_strncpy(arcn->name, hd->name, + MIN(sizeof(hd->name), sizeof(arcn->name)) - 1); + arcn->name[arcn->nlen] = '\0'; + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & + 0xfff); + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); +#ifdef NET2_STAT + arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT); + arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); +#else + arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT); + arcn->sb.st_mtime = (time_t)asc_uqd(hd->mtime, sizeof(hd->mtime), OCT); +#endif + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * have to look at the last character, it may be a '/' and that is used + * to encode this as a directory + */ + pt = &(arcn->name[arcn->nlen - 1]); + arcn->pad = 0; + arcn->skip = 0; + switch(hd->linkflag) { + case SYMTYPE: + /* + * symbolic link, need to get the link name and set the type in + * the st_mode so -v printing will look correct. + */ + arcn->type = PAX_SLK; + arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname, + MIN(sizeof(hd->linkname), sizeof(arcn->ln_name)) - 1); + arcn->ln_name[arcn->ln_nlen] = '\0'; + arcn->sb.st_mode |= S_IFLNK; + break; + case LNKTYPE: + /* + * hard link, need to get the link name, set the type in the + * st_mode and st_nlink so -v printing will look better. + */ + arcn->type = PAX_HLK; + arcn->sb.st_nlink = 2; + arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname, + MIN(sizeof(hd->linkname), sizeof(arcn->ln_name)) - 1); + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * no idea of what type this thing really points at, but + * we set something for printing only. + */ + arcn->sb.st_mode |= S_IFREG; + break; + case DIRTYPE: + /* + * It is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + break; + case AREGTYPE: + case REGTYPE: + default: + /* + * If we have a trailing / this is a directory and NOT a file. + */ + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + if (*pt == '/') { + /* + * it is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + } else { + /* + * have a file that will be followed by data. Set the + * skip value to the size field and calculate the size + * of the padding. + */ + arcn->type = PAX_REG; + arcn->sb.st_mode |= S_IFREG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } + break; + } + + /* + * strip off any trailing slash. + */ + if (*pt == '/') { + *pt = '\0'; + --arcn->nlen; + } + return(0); +} + +/* + * tar_wr() + * write a tar header for the file specified in the ARCHD to the archive. + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, each field + * of tar has it own spec for the termination character(s). + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +tar_wr(ARCHD *arcn) +{ + HD_TAR *hd; + int len; + char hdblk[sizeof(HD_TAR)]; + + /* + * check for those file system types which tar cannot store + */ + switch(arcn->type) { + case PAX_DIR: + /* + * user asked that dirs not be written to the archive + */ + if (tar_nodir) + return(1); + break; + case PAX_CHR: + paxwarn(1, "Tar cannot archive a character device %s", + arcn->org_name); + return(1); + case PAX_BLK: + paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name); + return(1); + case PAX_SCK: + paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name); + return(1); + case PAX_FIF: + paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name); + return(1); + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->ln_nlen >= (int)sizeof(hd->linkname)) { + paxwarn(1,"Link name too long for tar %s", arcn->ln_name); + return(1); + } + break; + case PAX_REG: + case PAX_CTG: + default: + break; + } + + /* + * check file name len, remember extra char for dirs (the / at the end) + */ + len = arcn->nlen; + if (arcn->type == PAX_DIR) + ++len; + if (len >= (int)sizeof(hd->name)) { + paxwarn(1, "File name too long for tar %s", arcn->name); + return(1); + } + + /* + * copy the data out of the ARCHD into the tar header based on the type + * of the file. Remember many tar readers want the unused fields to be + * padded with zero. We set the linkflag field (type), the linkname + * (or zero if not used),the size, and set the padding (if any) to be + * added after the file data (0 for all other types, as they only have + * a header) + */ + hd = (HD_TAR *)hdblk; + l_strncpy(hd->name, arcn->name, sizeof(hd->name) - 1); + hd->name[sizeof(hd->name) - 1] = '\0'; + arcn->pad = 0; + + if (arcn->type == PAX_DIR) { + /* + * directories are the same as files, except have a filename + * that ends with a /, we add the slash here. No data follows, + * dirs, so no pad. + */ + hd->linkflag = AREGTYPE; + memset(hd->linkname, 0, sizeof(hd->linkname)); + hd->name[len-1] = '/'; + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (arcn->type == PAX_SLK) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = SYMTYPE; + l_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname) - 1); + hd->linkname[sizeof(hd->linkname) - 1] = '\0'; + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = LNKTYPE; + l_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname) - 1); + hd->linkname[sizeof(hd->linkname) - 1] = '\0'; + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) + goto out; + } else { + /* + * data follows this file, so set the pad + */ + hd->linkflag = AREGTYPE; + memset(hd->linkname, 0, sizeof(hd->linkname)); +# ifdef NET2_STAT + if (ul_oct((u_long)arcn->sb.st_size, hd->size, + sizeof(hd->size), 1)) { +# else + if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size, + sizeof(hd->size), 1)) { +# endif + paxwarn(1,"File is too large for tar %s", arcn->org_name); + return(1); + } + arcn->pad = TAR_PAD(arcn->sb.st_size); + } + + /* + * copy those fields that are independent of the type + */ + if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || + ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || + ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || + ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) + goto out; + + /* + * calculate and add the checksum, then write the header. A return of + * 0 tells the caller to now write the file data, 1 says no data needs + * to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) + return(-1); + if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) + return(-1); + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Tar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * Routines for POSIX ustar + */ + +/* + * ustar_strd() + * initialization for ustar read + * Return: + * 0 if ok, -1 otherwise + */ + +int +ustar_strd(void) +{ + if ((usrtb_start() < 0) || (grptb_start() < 0)) + return(-1); + return(0); +} + +/* + * ustar_stwr() + * initialization for ustar write + * Return: + * 0 if ok, -1 otherwise + */ + +int +ustar_stwr(void) +{ + if ((uidtb_start() < 0) || (gidtb_start() < 0)) + return(-1); + return(0); +} + +/* + * ustar_id() + * determine if a block given to us is a valid ustar header. We have to + * be on the lookout for those pesky blocks of all zero's + * Return: + * 0 if a ustar header, -1 otherwise + */ + +int +ustar_id(char *blk, int size) +{ + HD_USTAR *hd; + + if (size < BLKMULT) + return(-1); + hd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test then check + * ustar magic cookie. We should use TMAGLEN, but some USTAR archive + * programs are fouled up and create archives missing the \0. Last we + * check the checksum. If ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return(-1); + if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + return(0); +} + +/* + * ustar_rd() + * extract the values out of block already determined to be a ustar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +ustar_rd(ARCHD *arcn, char *buf) +{ + HD_USTAR *hd; + char *dest; + int cnt = 0; + dev_t devmajor; + dev_t devminor; + + /* + * we only get proper sized buffers + */ + if (ustar_id(buf, BLKMULT) < 0) + return(-1); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + arcn->pat = NULL; + arcn->nlen = 0; + hd = (HD_USTAR *)buf; + + /* + * see if the filename is split into two parts. if, so joint the parts. + * we copy the prefix first and add a / between the prefix and name. + */ + dest = arcn->name; + if (*(hd->prefix) != '\0') { + cnt = l_strncpy(dest, hd->prefix, + MIN(sizeof(hd->prefix), sizeof(arcn->name) - 2)); + dest += cnt; + *dest++ = '/'; + cnt++; + } + /* + * ustar format specifies the name may be unterminated + * if it fills the entire field. this also applies to + * the prefix and the linkname. + */ + arcn->nlen = cnt + l_strncpy(dest, hd->name, + MIN(sizeof(hd->name), sizeof(arcn->name) - cnt - 1)); + arcn->name[arcn->nlen] = '\0'; + + /* + * follow the spec to the letter. we should only have mode bits, strip + * off all other crud we may be passed. + */ + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & + 0xfff); +#ifdef NET2_STAT + arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT); + arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); +#else + arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT); + arcn->sb.st_mtime = (time_t)asc_uqd(hd->mtime, sizeof(hd->mtime), OCT); +#endif + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * If we can find the ascii names for gname and uname in the password + * and group files we will use the uid's and gid they bind. Otherwise + * we use the uid and gid values stored in the header. (This is what + * the POSIX spec wants). + */ + hd->gname[sizeof(hd->gname) - 1] = '\0'; + if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0) + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + hd->uname[sizeof(hd->uname) - 1] = '\0'; + if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0) + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + + /* + * set the defaults, these may be changed depending on the file type + */ + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + arcn->pad = 0; + arcn->skip = 0; + arcn->sb.st_rdev = (dev_t)0; + + /* + * set the mode and PAX type according to the typeflag in the header + */ + switch(hd->typeflag) { + case FIFOTYPE: + arcn->type = PAX_FIF; + arcn->sb.st_mode |= S_IFIFO; + break; + case DIRTYPE: + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + + /* + * Some programs that create ustar archives append a '/' + * to the pathname for directories. This clearly violates + * ustar specs, but we will silently strip it off anyway. + */ + if (arcn->name[arcn->nlen - 1] == '/') + arcn->name[--arcn->nlen] = '\0'; + break; + case BLKTYPE: + case CHRTYPE: + /* + * this type requires the rdev field to be set. + */ + if (hd->typeflag == BLKTYPE) { + arcn->type = PAX_BLK; + arcn->sb.st_mode |= S_IFBLK; + } else { + arcn->type = PAX_CHR; + arcn->sb.st_mode |= S_IFCHR; + } + devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); + devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + break; + case SYMTYPE: + case LNKTYPE: + if (hd->typeflag == SYMTYPE) { + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + } else { + arcn->type = PAX_HLK; + /* + * so printing looks better + */ + arcn->sb.st_mode |= S_IFREG; + arcn->sb.st_nlink = 2; + } + /* + * copy the link name + */ + arcn->ln_nlen = l_strncpy(arcn->ln_name, hd->linkname, + MIN(sizeof(hd->linkname), sizeof(arcn->ln_name) - 1)); + arcn->ln_name[arcn->ln_nlen] = '\0'; + break; + case CONTTYPE: + case AREGTYPE: + case REGTYPE: + default: + /* + * these types have file data that follows. Set the skip and + * pad fields. + */ + arcn->type = PAX_REG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + arcn->sb.st_mode |= S_IFREG; + break; + } + return(0); +} + +/* + * ustar_wr() + * write a ustar header for the file specified in the ARCHD to the archive + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, we only use + * '\0' for the termination character (this is different than picky tar) + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +ustar_wr(ARCHD *arcn) +{ + HD_USTAR *hd; + char *pt; + char hdblk[sizeof(HD_USTAR)]; + + /* + * check for those file system types ustar cannot store + */ + if (arcn->type == PAX_SCK) { + paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name); + return(1); + } + + /* + * check the length of the linkname + */ + if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || + (arcn->type == PAX_HRG)) && + (arcn->ln_nlen > (int)sizeof(hd->linkname))) { + paxwarn(1, "Link name too long for ustar %s", arcn->ln_name); + return(1); + } + + /* + * split the path name into prefix and name fields (if needed). if + * pt != arcn->name, the name has to be split + */ + if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { + paxwarn(1, "File name too long for ustar %s", arcn->name); + return(1); + } + hd = (HD_USTAR *)hdblk; + arcn->pad = 0L; + + /* + * split the name, or zero out the prefix + */ + if (pt != arcn->name) { + /* + * name was split, pt points at the / where the split is to + * occur, we remove the / and copy the first part to the prefix + */ + *pt = '\0'; + l_strncpy(hd->prefix, arcn->name, sizeof(hd->prefix)); + *pt++ = '/'; + } else + memset(hd->prefix, 0, sizeof(hd->prefix)); + + /* + * copy the name part. this may be the whole path or the part after + * the prefix. both the name and prefix may fill the entire field. + */ + l_strncpy(hd->name, pt, sizeof(hd->name)); + + /* + * set the fields in the header that are type dependent + */ + switch(arcn->type) { + case PAX_DIR: + hd->typeflag = DIRTYPE; + memset(hd->linkname, 0, sizeof(hd->linkname)); + memset(hd->devmajor, 0, sizeof(hd->devmajor)); + memset(hd->devminor, 0, sizeof(hd->devminor)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_CHR: + case PAX_BLK: + if (arcn->type == PAX_CHR) + hd->typeflag = CHRTYPE; + else + hd->typeflag = BLKTYPE; + memset(hd->linkname, 0, sizeof(hd->linkname)); + if (ul_oct((u_long)major(arcn->sb.st_rdev), hd->devmajor, + sizeof(hd->devmajor), 3) || + ul_oct((u_long)minor(arcn->sb.st_rdev), hd->devminor, + sizeof(hd->devminor), 3) || + ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_FIF: + hd->typeflag = FIFOTYPE; + memset(hd->linkname, 0, sizeof(hd->linkname)); + memset(hd->devmajor, 0, sizeof(hd->devmajor)); + memset(hd->devminor, 0, sizeof(hd->devminor)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->type == PAX_SLK) + hd->typeflag = SYMTYPE; + else + hd->typeflag = LNKTYPE; + /* the link name may occupy the entire field in ustar */ + l_strncpy(hd->linkname,arcn->ln_name, sizeof(hd->linkname)); + memset(hd->devmajor, 0, sizeof(hd->devmajor)); + memset(hd->devminor, 0, sizeof(hd->devminor)); + if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_REG: + case PAX_CTG: + default: + /* + * file data with this type, set the padding + */ + if (arcn->type == PAX_CTG) + hd->typeflag = CONTTYPE; + else + hd->typeflag = REGTYPE; + memset(hd->linkname, 0, sizeof(hd->linkname)); + memset(hd->devmajor, 0, sizeof(hd->devmajor)); + memset(hd->devminor, 0, sizeof(hd->devminor)); + arcn->pad = TAR_PAD(arcn->sb.st_size); +# ifdef NET2_STAT + if (ul_oct((u_long)arcn->sb.st_size, hd->size, + sizeof(hd->size), 3)) { +# else + if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size, + sizeof(hd->size), 3)) { +# endif + paxwarn(1,"File is too long for ustar %s",arcn->org_name); + return(1); + } + break; + } + + l_strncpy(hd->magic, TMAGIC, TMAGLEN); + l_strncpy(hd->version, TVERSION, TVERSLEN); + + /* + * set the remaining fields. Some versions want all 16 bits of mode + * we better humor them (they really do not meet spec though).... + */ + if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || + ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || + ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || + ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) + goto out; + l_strncpy(hd->uname,name_uid(arcn->sb.st_uid, 0),sizeof(hd->uname)); + l_strncpy(hd->gname,name_gid(arcn->sb.st_gid, 0),sizeof(hd->gname)); + + /* + * calculate and store the checksum write the header to the archive + * return 0 tells the caller to now write the file data, 1 says no data + * needs to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) + return(-1); + if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) + return(-1); + if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Ustar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * name_split() + * see if the name has to be split for storage in a ustar header. We try + * to fit the entire name in the name field without splitting if we can. + * The split point is always at a / + * Return + * character pointer to split point (always the / that is to be removed + * if the split is not needed, the points is set to the start of the file + * name (it would violate the spec to split there). A NULL is returned if + * the file name is too long + */ + +static char * +name_split(char *name, int len) +{ + char *start; + + /* + * check to see if the file name is small enough to fit in the name + * field. if so just return a pointer to the name. + */ + if (len <= TNMSZ) + return(name); + if (len > (TPFSZ + TNMSZ + 1)) + return(NULL); + + /* + * we start looking at the biggest sized piece that fits in the name + * field. We walk forward looking for a slash to split at. The idea is + * to find the biggest piece to fit in the name field (or the smallest + * prefix we can find) + */ + start = name + len - TNMSZ - 1; + while ((*start != '\0') && (*start != '/')) + ++start; + + /* + * if we hit the end of the string, this name cannot be split, so we + * cannot store this file. + */ + if (*start == '\0') + return(NULL); + len = start - name; + + /* + * NOTE: /str where the length of str == TNMSZ can not be stored under + * the p1003.1-1990 spec for ustar. We could force a prefix of / and + * the file would then expand on extract to //str. The len == 0 below + * makes this special case follow the spec to the letter. + */ + if ((len > TPFSZ) || (len == 0)) + return(NULL); + + /* + * ok have a split point, return it to the caller + */ + return(start); +} diff --git a/commands/pax/tar.h b/commands/pax/tar.h new file mode 100644 index 000000000..ae767ba2a --- /dev/null +++ b/commands/pax/tar.h @@ -0,0 +1,145 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tar.h 8.2 (Berkeley) 4/18/94 + * $FreeBSD: src/bin/pax/tar.h,v 1.7 2004/04/06 20:06:49 markm Exp $ + */ + +/* + * defines and data structures common to all tar formats + */ +#define CHK_LEN 8 /* length of checksum field */ +#define TNMSZ 100 /* size of name field */ +#ifdef _PAX_ +#define NULLCNT 2 /* number of null blocks in trailer */ +#define CHK_OFFSET 148 /* start of checksum field */ +#define BLNKSUM 256L /* sum of checksum field using ' ' */ +#endif /* _PAX_ */ + +/* + * Values used in typeflag field in all tar formats + * (only REGTYPE, LNKTYPE and SYMTYPE are used in old BSD tar headers) + */ +#define REGTYPE '0' /* Regular File */ +#define AREGTYPE '\0' /* Regular File */ +#define LNKTYPE '1' /* Link */ +#define SYMTYPE '2' /* Symlink */ +#define CHRTYPE '3' /* Character Special File */ +#define BLKTYPE '4' /* Block Special File */ +#define DIRTYPE '5' /* Directory */ +#define FIFOTYPE '6' /* FIFO */ +#define CONTTYPE '7' /* high perf file */ + +/* + * Mode field encoding of the different file types - values in octal + */ +#define TSUID 04000 /* Set UID on execution */ +#define TSGID 02000 /* Set GID on execution */ +#define TSVTX 01000 /* Reserved */ +#define TUREAD 00400 /* Read by owner */ +#define TUWRITE 00200 /* Write by owner */ +#define TUEXEC 00100 /* Execute/Search by owner */ +#define TGREAD 00040 /* Read by group */ +#define TGWRITE 00020 /* Write by group */ +#define TGEXEC 00010 /* Execute/Search by group */ +#define TOREAD 00004 /* Read by other */ +#define TOWRITE 00002 /* Write by other */ +#define TOEXEC 00001 /* Execute/Search by other */ + +#ifdef _PAX_ +/* + * Pad with a bit mask, much faster than doing a mod but only works on powers + * of 2. Macro below is for block of 512 bytes. + */ +#define TAR_PAD(x) ((512 - ((x) & 511)) & 511) +#endif /* _PAX_ */ + +/* + * structure of an old tar header as it appeared in BSD releases + */ +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char linkflag; /* norm, hard, or sym. */ + char linkname[TNMSZ]; /* linked to name */ +} HD_TAR; + +#ifdef _PAX_ +/* + * -o options for BSD tar to not write directories to the archive + */ +#define TAR_NODIR "nodir" +#define TAR_OPTION "write_opt" + +/* + * default device names + */ +#define DEV_0 "/dev/rmt0" +#define DEV_1 "/dev/rmt1" +#define DEV_4 "/dev/rmt4" +#define DEV_5 "/dev/rmt5" +#define DEV_7 "/dev/rmt7" +#define DEV_8 "/dev/rmt8" +#endif /* _PAX_ */ + +/* + * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990 + */ +#define TPFSZ 155 +#define TMAGIC "ustar" /* ustar and a null */ +#define TMAGLEN 6 +#define TVERSION "00" /* 00 and no null */ +#define TVERSLEN 2 + +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char typeflag; /* type of file. */ + char linkname[TNMSZ]; /* linked to name */ + char magic[TMAGLEN]; /* magic cookie */ + char version[TVERSLEN]; /* version */ + char uname[32]; /* ascii owner name */ + char gname[32]; /* ascii group name */ + char devmajor[8]; /* major device number */ + char devminor[8]; /* minor device number */ + char prefix[TPFSZ]; /* linked to name */ +} HD_USTAR; diff --git a/commands/pax/tty_subs.c b/commands/pax/tty_subs.c new file mode 100644 index 000000000..3a3565440 --- /dev/null +++ b/commands/pax/tty_subs.c @@ -0,0 +1,189 @@ +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)tty_subs.c 8.2 (Berkeley) 4/18/94"; +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include "pax.h" +#include "extern.h" +#include + +/* + * routines that deal with I/O to and from the user + */ + +#define DEVTTY "/dev/tty" /* device for interactive i/o */ +static FILE *ttyoutf = NULL; /* output pointing at control tty */ +static FILE *ttyinf = NULL; /* input pointing at control tty */ + +/* + * tty_init() + * try to open the controlling terminal (if any) for this process. if the + * open fails, future ops that require user input will get an EOF + */ + +int +tty_init(void) +{ + int ttyfd; + + if ((ttyfd = open(DEVTTY, O_RDWR)) >= 0) { + if ((ttyoutf = fdopen(ttyfd, "w")) != NULL) { + if ((ttyinf = fdopen(ttyfd, "r")) != NULL) + return(0); + (void)fclose(ttyoutf); + } + (void)close(ttyfd); + } + + if (iflag) { + paxwarn(1, "Fatal error, cannot open %s", DEVTTY); + return(-1); + } + return(0); +} + +/* + * tty_prnt() + * print a message using the specified format to the controlling tty + * if there is no controlling terminal, just return. + */ + +void +tty_prnt(const char *fmt, ...) +{ + va_list ap; + if (ttyoutf == NULL) + return; + va_start(ap, fmt); + (void)vfprintf(ttyoutf, fmt, ap); + va_end(ap); + (void)fflush(ttyoutf); +} + +/* + * tty_read() + * read a string from the controlling terminal if it is open into the + * supplied buffer + * Return: + * 0 if data was read, -1 otherwise. + */ + +int +tty_read(char *str, int len) +{ + char *pt; + + if ((--len <= 0) || (ttyinf == NULL) || (fgets(str,len,ttyinf) == NULL)) + return(-1); + *(str + len) = '\0'; + + /* + * strip off that trailing newline + */ + if ((pt = strchr(str, '\n')) != NULL) + *pt = '\0'; + return(0); +} + +/* + * paxwarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +paxwarn(int set, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fflush(listf); + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fputc('\n', stderr); +} + +/* + * syswarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +syswarn(int set, int errnum, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fflush(listf); + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + + /* + * format and print the errno + */ + if (errnum > 0) + (void)fprintf(stderr, " <%s>", strerror(errnum)); + (void)fputc('\n', stderr); +}