From 30eeed2c01ebf515c83ba89332f6b0e64670c011 Mon Sep 17 00:00:00 2001 From: Thomas Cort Date: Sun, 27 Oct 2013 13:28:00 -0400 Subject: [PATCH] Importing usr.bin/split Replaces commands/split. No Minix-specific changes needed. Change-Id: I3c6551cf52d4c43eea0e8005b9cae83e17271f2d --- commands/Makefile | 2 +- commands/split/Makefile | 4 - commands/split/split.c | 117 ------------- man/man1/Makefile | 2 +- man/man1/split.1 | 36 ---- releasetools/nbsd_ports | 1 + usr.bin/Makefile | 2 +- usr.bin/split/Makefile | 6 + usr.bin/split/split.1 | 132 +++++++++++++++ usr.bin/split/split.c | 361 ++++++++++++++++++++++++++++++++++++++++ 10 files changed, 503 insertions(+), 160 deletions(-) delete mode 100644 commands/split/Makefile delete mode 100644 commands/split/split.c delete mode 100644 man/man1/split.1 create mode 100644 usr.bin/split/Makefile create mode 100644 usr.bin/split/split.1 create mode 100644 usr.bin/split/split.c diff --git a/commands/Makefile b/commands/Makefile index bb64fec91..abd09ec4e 100644 --- a/commands/Makefile +++ b/commands/Makefile @@ -23,7 +23,7 @@ SUBDIR= add_route arp ash at backup btrace \ ramdisk rarpd rawspeed rcp readclock \ reboot remsync rget rlogin \ rotate rsh rshd service setup shar \ - sleep slip spell split sprofalyze sprofdiff srccrc \ + sleep slip spell sprofalyze sprofdiff srccrc \ stty svclog svrctl swifi synctree sysenv \ syslogd tail tcpd tcpdp tcpstat telnet \ telnetd term termcap tget time touch \ diff --git a/commands/split/Makefile b/commands/split/Makefile deleted file mode 100644 index 0ba292949..000000000 --- a/commands/split/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -PROG= split -MAN= - -.include diff --git a/commands/split/split.c b/commands/split/split.c deleted file mode 100644 index 1fd14aecb..000000000 --- a/commands/split/split.c +++ /dev/null @@ -1,117 +0,0 @@ -/* split - split a file Author: Michiel Huisjes */ - -#include -#include -#include -#include -#include -#include - -#define CHUNK_SIZE 1024 - -int cut_line = 1000; -int infile; -char out_file[100]; -char *suffix; - -int main(int argc, char **argv); -void split(void); -int newfile(void); -void usage(void); -void quit(void); - -int main(argc, argv) -int argc; -char **argv; -{ - unsigned short i; - - out_file[0] = 'x'; - infile = -1; - - if (argc > 4) usage(); - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - if (argv[i][1] >= '0' && argv[i][1] <= '9' - && cut_line == 1000) - cut_line = atoi(argv[i]); - else if (argv[i][1] == '\0' && infile == -1) - infile = 0; - else - usage(); - } else if (infile == -1) { - if ((infile = open(argv[i], O_RDONLY)) < 0) { - std_err("Cannot open input file.\n"); - exit(1); - } - } else - strcpy(out_file, argv[i]); - } - if (infile == -1) infile = 0; - strcat(out_file, "aa"); - for (suffix = out_file; *suffix; suffix++); - suffix--; - -/* Appendix now points to last `a' of "aa". We have to decrement it by one */ - *suffix = 'a' - 1; - split(); - return(0); -} - -void split() -{ - char buf[CHUNK_SIZE]; - register char *index, *base; - register int n; - int fd; - long lines = 0L; - - fd = -1; - while ((n = read(infile, buf, CHUNK_SIZE)) > 0) { - base = index = buf; - while (--n >= 0) { - if (*index++ == '\n') - if (++lines % cut_line == 0) { - if (fd == -1) fd = newfile(); - if (write(fd, base, (int) (index - base)) != (int) (index - base)) - quit(); - base = index; - close(fd); - fd = -1; - } - } - if (index == base) continue; - if (fd == -1) fd = newfile(); - if (write(fd, base, (int) (index - base)) != (int) (index - base)) - quit(); - } -} - -int newfile() -{ - int fd; - - if (++*suffix > 'z') { /* Increment letter */ - *suffix = 'a'; /* Reset last letter */ - ++*(suffix - 1); /* Previous letter must be incremented */ - /* E.g. was `filename.az' */ - /* Now `filename.ba' */ - } - if ((fd = creat(out_file, 0644)) < 0) { - std_err("Cannot create new file.\n"); - exit(2); - } - return fd; -} - -void usage() -{ - std_err("Usage: split [-n] [file [name]].\n"); - exit(1); -} - -void quit() -{ - std_err("split: write error\n"); - exit(1); -} diff --git a/man/man1/Makefile b/man/man1/Makefile index dcd7a151f..1f9bce7b3 100644 --- a/man/man1/Makefile +++ b/man/man1/Makefile @@ -16,7 +16,7 @@ MAN= ash.1 at.1 \ profile.1 ps.1 rcp.1 recwave.1 \ remsync.1 rget.1 rlogin.1 rsh.1 rz.1 \ shar.1 sleep.1 spell.1 \ - split.1 stty.1 svc.1 svrctl.1 \ + stty.1 svc.1 svrctl.1 \ synctree.1 sysenv.1 sz.1 tail.1 telnet.1 template.1 \ term.1 termcap.1 tget.1 time.1 true.1 \ truncate.1 tty.1 umount.1 uname.1 \ diff --git a/man/man1/split.1 b/man/man1/split.1 deleted file mode 100644 index 54f423a88..000000000 --- a/man/man1/split.1 +++ /dev/null @@ -1,36 +0,0 @@ -.TH SPLIT 1 -.SH NAME -split \- split a large file into several smaller files -.SH SYNOPSIS -\fBsplit\fR [\fB\-\fIn\fR]\fR [\fIfile \fR[\fIprefix\fR]\fR]\fR -.br -.de FL -.TP -\\fB\\$1\\fR -\\$2 -.. -.de EX -.TP 20 -\\fB\\$1\\fR -# \\$2 -.. -.SH OPTIONS -.TP 5 -.B \-\fIn\fP -# Number of lines per piece (default: 1000) -.SH EXAMPLES -.TP 20 -.B split \-200 file -# Split \fIfile\fP into pieces of 200 lines each -.TP 20 -.B split file z -# Split \fIfile\fP into \fIzaa\fP, \fIzab\fP, etc. -.SH DESCRIPTION -.PP -.I Split -reads \fIfile\fP and writes it out in \fIn\fP-line pieces. -By default, the pieces are called \fIxaa\fP, \fIxab\fP, etc. -The optional second argument can be used to provide an alternative -prefix for the output file names. -.SH "SEE ALSO" -.BR cat (1). diff --git a/releasetools/nbsd_ports b/releasetools/nbsd_ports index db13255a1..4d026042b 100644 --- a/releasetools/nbsd_ports +++ b/releasetools/nbsd_ports @@ -202,6 +202,7 @@ 2010/05/27 08:40:19,usr.bin/seq 2013/06/02 12:00:00,usr.bin/shuffle 2012/10/17 12:00:00,usr.bin/sort +2012/10/17 12:00:00,usr.bin/split 2011/01/15 22:54:10,usr.bin/stat 2012/02/10 16:16:12,usr.bin/su 2013/10/06 12:00:00,usr.bin/tee diff --git a/usr.bin/Makefile b/usr.bin/Makefile index eb3679934..a058a0597 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -24,7 +24,7 @@ SUBDIR= asa \ renice rev \ \ shuffle sed seq \ - sort stat su \ + sort split stat su \ tee tic tput \ tr tsort unexpand \ toproto \ diff --git a/usr.bin/split/Makefile b/usr.bin/split/Makefile new file mode 100644 index 000000000..254d44b90 --- /dev/null +++ b/usr.bin/split/Makefile @@ -0,0 +1,6 @@ +# $NetBSD: Makefile,v 1.6 2009/04/14 22:15:26 lukem Exp $ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= split + +.include diff --git a/usr.bin/split/split.1 b/usr.bin/split/split.1 new file mode 100644 index 000000000..a25e3c6ea --- /dev/null +++ b/usr.bin/split/split.1 @@ -0,0 +1,132 @@ +.\" $NetBSD: split.1,v 1.15 2007/05/31 01:35:35 jschauma Exp $ +.\" +.\" Copyright (c) 1990, 1991, 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)split.1 8.3 (Berkeley) 4/16/94 +.\" +.Dd May 28, 2007 +.Dt SPLIT 1 +.Os +.Sh NAME +.Nm split +.Nd split a file into pieces +.Sh SYNOPSIS +.Nm +.Op Fl a Ar suffix_length +.Oo +.Fl b Ar byte_count Ns Oo Li k|m Oc | +.Fl l Ar line_count +.Fl n Ar chunk_count +.Oc +.Op Ar file Op Ar name +.Sh DESCRIPTION +The +.Nm +utility reads the given +.Ar file +and breaks it up into files of 1000 lines each. +If +.Ar file +is a single dash or absent, +.Nm +reads from the standard input. +.Ar file +itself is not altered. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +Use +.Ar suffix_length +letters to form the suffix of the file name. +.It Fl b +Create smaller files +.Ar byte_count +bytes in length. +If +.Ql k +is appended to the number, the file is split into +.Ar byte_count +kilobyte pieces. +If +.Ql m +is appended to the number, the file is split into +.Ar byte_count +megabyte pieces. +.It Fl l +Create smaller files +.Ar line_count +lines in length. +.It Fl n +Split file into +.Ar chunk_count +smaller files. +.El +.Pp +If additional arguments are specified, the first is used as the name +of the input file which is to be split. +If a second additional argument is specified, it is used as a prefix +for the names of the files into which the file is split. +In this case, each file into which the file is split is named by the +prefix followed by a lexically ordered suffix using +.Ar suffix_length +characters in the range +.Dq Li a-z . +If +.Fl a +is not specified, two letters are used as the suffix. +.Pp +If the +.Ar name +argument is not specified, +.Ql x +is used. +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.1-2001 . +.Sh HISTORY +A +.Nm +command appeared in +.At v6 . +.Pp +The +.Fl a +option was introduced in +.Nx 2.0 . +Before that, if +.Ar name +was not specified, +.Nm +would vary the first letter of the filename +to increase the number of possible output files. +The +.Fl a +option makes this unnecessary. diff --git a/usr.bin/split/split.c b/usr.bin/split/split.c new file mode 100644 index 000000000..d8f6665c4 --- /dev/null +++ b/usr.bin/split/split.c @@ -0,0 +1,361 @@ +/* $NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $ */ + +/* + * Copyright (c) 1987, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; +#endif +__RCSID("$NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $"); +#endif /* not lint */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEFLINE 1000 /* Default num lines per file. */ + +static int file_open; /* If a file open. */ +static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */ +static char *fname; /* File name prefix. */ +static size_t sfxlen = 2; /* suffix length. */ + +static void newfile(void); +static void split1(off_t, int) __dead; +static void split2(off_t) __dead; +static void split3(off_t) __dead; +static void usage(void) __dead; +static size_t bigwrite(int, void const *, size_t); + +int +main(int argc, char *argv[]) +{ + int ch; + char *ep, *p; + char const *base; + off_t bytecnt = 0; /* Byte count to split on. */ + off_t numlines = 0; /* Line count to split on. */ + off_t chunks = 0; /* Number of chunks to split into. */ + + while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1) + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* + * Undocumented kludge: split was originally designed + * to take a number after a dash. + */ + if (numlines == 0) { + p = argv[optind - 1]; + if (p[0] == '-' && p[1] == ch && !p[2]) + p++; + else + p = argv[optind] + 1; + numlines = strtoull(p, &ep, 10); + if (numlines == 0 || *ep != '\0') + errx(1, "%s: illegal line count.", p); + } + break; + case 'b': /* Byte count. */ + if (!isdigit((unsigned char)optarg[0]) || + (bytecnt = strtoull(optarg, &ep, 10)) == 0 || + (*ep != '\0' && *ep != 'k' && *ep != 'm')) + errx(1, "%s: illegal byte count.", optarg); + if (*ep == 'k') + bytecnt *= 1024; + else if (*ep == 'm') + bytecnt *= 1024 * 1024; + break; + case 'l': /* Line count. */ + if (numlines != 0) + usage(); + if (!isdigit((unsigned char)optarg[0]) || + (numlines = strtoull(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(1, "%s: illegal line count.", optarg); + break; + case 'a': /* Suffix length. */ + if (!isdigit((unsigned char)optarg[0]) || + (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(1, "%s: illegal suffix length.", optarg); + break; + case 'n': /* Chunks. */ + if (!isdigit((unsigned char)optarg[0]) || + (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || + *ep != '\0') + errx(1, "%s: illegal number of chunks.", optarg); + break; + default: + usage(); + } + argv += optind; + argc -= optind; + + if (*argv != NULL) { + if (strcmp(*argv, "-") != 0 && + (ifd = open(*argv, O_RDONLY, 0)) < 0) + err(1, "%s", *argv); + ++argv; + } + + + base = (*argv != NULL) ? *argv++ : "x"; + if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL) + err(EXIT_FAILURE, NULL); + (void)strcpy(fname, base); /* File name prefix. */ + + if (*argv != NULL) + usage(); + + if (numlines == 0) + numlines = DEFLINE; + else if (bytecnt || chunks) + usage(); + + if (bytecnt && chunks) + usage(); + + if (bytecnt) + split1(bytecnt, 0); + else if (chunks) + split3(chunks); + else + split2(numlines); + + return 0; +} + +/* + * split1 -- + * Split the input by bytes. + */ +static void +split1(off_t bytecnt, int maxcnt) +{ + off_t bcnt; + ssize_t dist, len; + char *C; + char bfr[MAXBSIZE]; + int nfiles; + + nfiles = 0; + + for (bcnt = 0;;) + switch (len = read(ifd, bfr, MAXBSIZE)) { + case 0: + exit(0); + /* NOTREACHED */ + case -1: + err(1, "read"); + /* NOTREACHED */ + default: + if (!file_open) { + if (!maxcnt || (nfiles < maxcnt)) { + newfile(); + nfiles++; + file_open = 1; + } + } + if (bcnt + len >= bytecnt) { + /* LINTED: bytecnt - bcnt <= len */ + dist = bytecnt - bcnt; + if (bigwrite(ofd, bfr, dist) != (size_t)dist) + err(1, "write"); + len -= dist; + for (C = bfr + dist; len >= bytecnt; + /* LINTED: bytecnt <= len */ + len -= bytecnt, C += bytecnt) { + if (!maxcnt || (nfiles < maxcnt)) { + newfile(); + nfiles++; + } + /* LINTED: as above */ + if (bigwrite(ofd, + C, bytecnt) != (size_t)bytecnt) + err(1, "write"); + } + if (len) { + if (!maxcnt || (nfiles < maxcnt)) { + newfile(); + nfiles++; + } + /* LINTED: len >= 0 */ + if (bigwrite(ofd, C, len) != (size_t)len) + err(1, "write"); + } else + file_open = 0; + bcnt = len; + } else { + bcnt += len; + /* LINTED: len >= 0 */ + if (bigwrite(ofd, bfr, len) != (size_t)len) + err(1, "write"); + } + } +} + +/* + * split2 -- + * Split the input by lines. + */ +static void +split2(off_t numlines) +{ + off_t lcnt; + size_t bcnt; + ssize_t len; + char *Ce, *Cs; + char bfr[MAXBSIZE]; + + for (lcnt = 0;;) + switch (len = read(ifd, bfr, MAXBSIZE)) { + case 0: + exit(0); + /* NOTREACHED */ + case -1: + err(1, "read"); + /* NOTREACHED */ + default: + if (!file_open) { + newfile(); + file_open = 1; + } + for (Cs = Ce = bfr; len--; Ce++) + if (*Ce == '\n' && ++lcnt == numlines) { + bcnt = Ce - Cs + 1; + if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) + err(1, "write"); + lcnt = 0; + Cs = Ce + 1; + if (len) + newfile(); + else + file_open = 0; + } + if (Cs < Ce) { + bcnt = Ce - Cs; + if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) + err(1, "write"); + } + } +} + +/* + * split3 -- + * Split the input into specified number of chunks + */ +static void +split3(off_t chunks) +{ + struct stat sb; + + if (fstat(ifd, &sb) == -1) { + err(1, "stat"); + /* NOTREACHED */ + } + + if (chunks > sb.st_size) { + errx(1, "can't split into more than %d files", + (int)sb.st_size); + /* NOTREACHED */ + } + + split1(sb.st_size/chunks, chunks); +} + +/* + * newfile -- + * Open a new output file. + */ +static void +newfile(void) +{ + static int fnum; + static char *fpnt; + int quot, i; + + if (ofd == -1) { + fpnt = fname + strlen(fname); + fpnt[sfxlen] = '\0'; + } else if (close(ofd) != 0) + err(1, "%s", fname); + + quot = fnum; + for (i = sfxlen - 1; i >= 0; i--) { + fpnt[i] = quot % 26 + 'a'; + quot = quot / 26; + } + if (quot > 0) + errx(1, "too many files."); + ++fnum; + if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0) + err(1, "%s", fname); +} + +static size_t +bigwrite(int fd, const void *buf, size_t len) +{ + const char *ptr = buf; + size_t sofar = 0; + ssize_t w; + + while (len != 0) { + if ((w = write(fd, ptr, len)) == -1) + return sofar; + len -= w; + ptr += w; + sofar += w; + } + return sofar; +} + + +static void +usage(void) +{ + (void)fprintf(stderr, +"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] " +"[file [prefix]]\n", getprogname()); + exit(1); +} -- 2.44.0