From ff26d9a4ff9e3704d24e5d3a43a6c27f34c14b48 Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Tue, 22 Jun 2010 21:20:54 +0000 Subject: [PATCH] original netbsd printf, cut --- commands/cut/Makefile | 10 + commands/cut/cut.1 | 130 ++++++++ commands/cut/cut.c | 302 +++++++++++++++++ commands/cut/x_cut.c | 95 ++++++ commands/printf/Makefile | 7 + commands/printf/printf.1 | 412 +++++++++++++++++++++++ commands/printf/printf.c | 693 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 1649 insertions(+) create mode 100644 commands/cut/Makefile create mode 100644 commands/cut/cut.1 create mode 100644 commands/cut/cut.c create mode 100644 commands/cut/x_cut.c create mode 100644 commands/printf/Makefile create mode 100644 commands/printf/printf.1 create mode 100644 commands/printf/printf.c diff --git a/commands/cut/Makefile b/commands/cut/Makefile new file mode 100644 index 000000000..1fa210486 --- /dev/null +++ b/commands/cut/Makefile @@ -0,0 +1,10 @@ +# $NetBSD: Makefile,v 1.5 2007/07/02 18:41:03 christos Exp $ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 + +WARNS?= 4 +PROG= cut + +LDADD+= -lutil +DPADD+= ${LIBUTIL} + +.include diff --git a/commands/cut/cut.1 b/commands/cut/cut.1 new file mode 100644 index 000000000..e5827c913 --- /dev/null +++ b/commands/cut/cut.1 @@ -0,0 +1,130 @@ +.\" $NetBSD: cut.1,v 1.14.26.1 2008/12/23 03:47:20 snj Exp $ +.\" +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)cut.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd December 21, 2008 +.Dt CUT 1 +.Os +.Sh NAME +.Nm cut +.Nd select portions of each line of a file +.Sh SYNOPSIS +.Nm +.Fl b Ar list +.Op Fl n +.Op Ar +.Nm +.Fl c Ar list +.Op Ar +.Nm +.Fl f Ar list +.Op Fl d Ar delim +.Op Fl s +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility selects portions of each line (as specified by +.Ar list ) +from each +.Ar file +and writes them to the +standard output. +If the +.Ar file +argument is a single dash +.Pq Sq - +or no +.Ar file +arguments were specified, lines are read from the standard input. +The items specified by +.Ar list +can be in terms of column position or in terms of fields delimited +by a special character. +Column numbering starts from 1. +.Pp +.Ar List +is a comma or whitespace separated set of increasing numbers and/or +number ranges. +Number ranges consist of a number, a dash +.Pq Li \- , +and a second number +and select the fields or columns from the first number to the second, +inclusive. +Numbers or number ranges may be preceded by a dash, which selects all +fields or columns from 1 to the first number. +Numbers or number ranges may be followed by a dash, which selects all +fields or columns from the last number to the end of the line. +Numbers and number ranges may be repeated, overlapping, and in any order. +It is not an error to select fields or columns not present in the +input line. +.Pp +The options are as follows: +.Bl -tag -width Fl +.It Fl b Ar list +The +.Ar list +specifies byte positions. +.It Fl c Ar list +The +.Ar list +specifies character positions. +.It Fl d Ar string +Use the first character of +.Ar string +as the field delimiter character. +The default is the +.Aq TAB +character. +.It Fl f Ar list +The +.Ar list +specifies fields, separated by the field delimiter character. +The selected fields are output, +separated by the field delimiter character. +.It Fl n +Do not split multi-byte characters. +.It Fl s +Suppresses lines with no field delimiter characters. +Unless specified, lines with no delimiters are passed through unmodified. +.El +.Sh EXIT STATUS +.Nm +exits 0 on success, 1 if an error occurred. +.Sh SEE ALSO +.Xr paste 1 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.2-92 . diff --git a/commands/cut/cut.c b/commands/cut/cut.c new file mode 100644 index 000000000..30eff8dfc --- /dev/null +++ b/commands/cut/cut.c @@ -0,0 +1,302 @@ +/* $NetBSD: cut.c,v 1.25 2008/07/21 14:19:22 lukem Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; +#endif +__RCSID("$NetBSD: cut.c,v 1.25 2008/07/21 14:19:22 lukem Exp $"); +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int bflag; +static int cflag; +static char dchar; +static int dflag; +static int fflag; +static int sflag; + +static void b_cut(FILE *, const char *); +static void c_cut(FILE *, const char *); +static void f_cut(FILE *, const char *); +static void get_list(char *); +static void usage(void) __dead; + +int +main(int argc, char *argv[]) +{ + FILE *fp; + void (*fcn)(FILE *, const char *); + int ch; + + fcn = NULL; + (void)setlocale(LC_ALL, ""); + + dchar = '\t'; /* default delimiter is \t */ + + /* Since we don't support multi-byte characters, the -c and -b + options are equivalent, and the -n option is meaningless. */ + while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) + switch(ch) { + case 'b': + fcn = b_cut; + get_list(optarg); + bflag = 1; + break; + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case 'n': + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag || bflag) + usage(); + } else if ((!cflag && !bflag) || dflag || sflag) + usage(); + else if (bflag && cflag) + usage(); + + if (*argv) + for (; *argv; ++argv) { + if (strcmp(*argv, "-") == 0) + fcn(stdin, "stdin"); + else { + if ((fp = fopen(*argv, "r")) == NULL) + err(1, "%s", *argv); + fcn(fp, *argv); + (void)fclose(fp); + } + } + else + fcn(stdin, "stdin"); + return 0; +} + +static size_t autostart, autostop, maxval; + +static char *positions = NULL; +static size_t numpositions = 0; +#define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ + +static void +get_list(char *list) +{ + size_t setautostart, start, stop; + char *pos; + char *p; + + if (positions == NULL) { + numpositions = ALLOC_CHUNK; + positions = ecalloc(numpositions, sizeof(*positions)); + } + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason too. + */ + for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit((unsigned char)*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit((unsigned char)p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + errx(1, "[-cf] list: illegal list value"); + if (!stop || !start) + errx(1, "[-cf] list: values may not include zero"); + if (stop + 1 > numpositions) { + size_t newsize; + newsize = roundup(stop + 1, ALLOC_CHUNK); + positions = erealloc(positions, newsize); + (void)memset(positions + numpositions, 0, + newsize - numpositions); + numpositions = newsize; + } + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; pos++) + *pos = 1; + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + (void)memset(positions + 1, '1', autostart); +} + +static void +/*ARGSUSED*/ +f_cut(FILE *fp, const char *fname __unused) +{ + int ch, field, isdelim; + char *pos, *p, sep; + int output; + size_t len; + char *lbuf, *tbuf; + + for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { + output = 0; + if (lbuf[len - 1] != '\n') { + /* no newline at the end of the last line so add one */ + if ((tbuf = (char *)malloc(len + 1)) == NULL) + err(1, NULL); + (void)memcpy(tbuf, lbuf, len); + tbuf[len++] = '\n'; + lbuf = tbuf; + } + for (isdelim = 0, p = lbuf;; ++p) { + ch = *p; + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)fwrite(lbuf, len, 1, stdout); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + (void)putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + (void)putchar(ch); + } else { + while ((ch = *p++) != '\n' && ch != sep) + continue; + } + if (ch == '\n') + break; + } + if (ch != '\n') { + if (autostop) { + if (output) + (void)putchar(sep); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + } + (void)putchar('\n'); + if (tbuf) { + free(tbuf); + tbuf = NULL; + } + } + if (tbuf) + free(tbuf); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "Usage:\tcut -b list [-n] [file ...]\n" + "\tcut -c list [file1 ...]\n" + "\tcut -f list [-d delim] [-s] [file ...]\n"); + exit(1); +} + +/* make b_put(): */ +#define CUT_BYTE 1 +#include "x_cut.c" +#undef CUT_BYTE + +/* make c_put(): */ +#define CUT_BYTE 0 +#include "x_cut.c" +#undef CUT_BYTE diff --git a/commands/cut/x_cut.c b/commands/cut/x_cut.c new file mode 100644 index 000000000..006b5afd6 --- /dev/null +++ b/commands/cut/x_cut.c @@ -0,0 +1,95 @@ +/* $NetBSD: x_cut.c,v 1.2 2007/07/02 18:41:04 christos Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This file is #include'd twice from cut.c, to generate both + * single- and multibyte versions of the same code. + * + * In cut.c #define: + * CUT_BYTE=0 to define b_cut (singlebyte), and + * CUT_BYTE=1 to define c_cut (multibyte). + * + */ + +#if (CUT_BYTE == 1) +# define CUT_FN b_cut +# define CUT_CH_T int +# define CUT_GETC getc +# define CUT_EOF EOF +# define CUT_PUTCHAR putchar +#else +# define CUT_FN c_cut +# define CUT_CH_T wint_t +# define CUT_GETC getwc +# define CUT_EOF WEOF +# define CUT_PUTCHAR putwchar +#endif + + +/* ARGSUSED */ +void +CUT_FN(FILE *fp, const char *fname __unused) +{ + CUT_CH_T ch; + int col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = CUT_GETC(fp)) == EOF) + return; + if (ch == '\n') + break; + if (*pos++) + (void)CUT_PUTCHAR(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = CUT_GETC(fp)) != CUT_EOF && ch != '\n') + (void)CUT_PUTCHAR(ch); + else + while ((ch = CUT_GETC(fp)) != CUT_EOF && ch != '\n'); + } + (void)CUT_PUTCHAR('\n'); + } +} + +#undef CUT_FN +#undef CUT_CH_T +#undef CUT_GETC +#undef CUT_EOF +#undef CUT_PUTCHAR + diff --git a/commands/printf/Makefile b/commands/printf/Makefile new file mode 100644 index 000000000..ec61ff2c5 --- /dev/null +++ b/commands/printf/Makefile @@ -0,0 +1,7 @@ +# $NetBSD: Makefile,v 1.9 2004/10/30 19:28:35 christos Exp $ +# from: @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= printf +WARNS= 3 + +.include diff --git a/commands/printf/printf.1 b/commands/printf/printf.1 new file mode 100644 index 000000000..7f1aff2ec --- /dev/null +++ b/commands/printf/printf.1 @@ -0,0 +1,412 @@ +.\" $NetBSD: printf.1,v 1.22 2008/09/01 09:20:41 dholland Exp $ +.\" +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from: @(#)printf.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd May 6, 2008 +.Dt PRINTF 1 +.Os +.Sh NAME +.Nm printf +.Nd formatted output +.Sh SYNOPSIS +.Nm +.Ar format +.Op Ar arguments ... +.Sh DESCRIPTION +.Nm +formats and prints its arguments, after the first, under control +of the +.Ar format . +The +.Ar format +is a character string which contains three types of objects: plain characters, +which are simply copied to standard output, character escape sequences which +are converted and copied to the standard output, and format specifications, +each of which causes printing of the next successive +.Ar argument . +.Pp +The +.Ar arguments +after the first are treated as strings if the corresponding format is +either +.Cm b , +.Cm B , +.Cm c , +or +.Cm s ; +otherwise it is evaluated as a C constant, with the following extensions: +.Pp +.Bl -bullet -offset indent -compact +.It +A leading plus or minus sign is allowed. +.It +If the leading character is a single or double quote, the value is the +.Tn ASCII +code of the next character. +.El +.Pp +The format string is reused as often as necessary to satisfy the +.Ar arguments . +Any extra format specifications are evaluated with zero or the null +string. +.Pp +Character escape sequences are in backslash notation as defined in +.St -ansiC . +The characters and their meanings are as follows: +.Bl -tag -width Ds -offset indent +.It Cm \ee +Write an +.Aq escape +character. +.It Cm \ea +Write a +.Aq bell +character. +.It Cm \eb +Write a +.Aq backspace +character. +.It Cm \ef +Write a +.Aq form-feed +character. +.It Cm \en +Write a +.Aq new-line +character. +.It Cm \er +Write a +.Aq carriage return +character. +.It Cm \et +Write a +.Aq tab +character. +.It Cm \ev +Write a +.Aq vertical tab +character. +.It Cm \e\' +Write a +.Aq single quote +character. +.It Cm \e" +Write a +.Aq double quote +character. +.It Cm \e\e +Write a backslash character. +.It Cm \e Ns Ar num +Write an 8\-bit character whose +.Tn ASCII +value is the 1\-, 2\-, or 3\-digit octal number +.Ar num . +.It Cm \ex Ns Ar xx +Write an 8\-bit character whose +.Tn ASCII +value is the 1\- or 2\-digit hexadecimal number +.Ar xx . +.El +.Pp +Each format specification is introduced by the percent character +.Pq Dq \&% . +The remainder of the format specification includes, +in the following order: +.Bl -tag -width Ds +.It Zero or more of the following flags : +.Bl -tag -width Ds +.It Cm # +A +.Sq # +character specifying that the value should be printed in an +.Dq alternative form . +For +.Cm b , +.Cm c , +.Cm d , +and +.Cm s +formats, this option has no effect. +For the +.Cm o +format the precision of the number is increased to force the first +character of the output string to a zero. +For the +.Cm x +.Pq Cm X +format, a non-zero result has the string +.Li 0x +.Pq Li 0X +prepended to it. +For +.Cm e , +.Cm E , +.Cm f , +.Cm g , +and +.Cm G +formats, the result will always contain a decimal point, even if no +digits follow the point (normally, a decimal point only appears in the +results of those formats if a digit follows the decimal point). +For +.Cm g +and +.Cm G +formats, trailing zeros are not removed from the result as they +would otherwise be. +.\" I turned this off - decided it isn't a valid use of '#' +.\" For the +.\" .Cm B +.\" format, backslash-escape sequences are expanded first; +.It Cm \&\- +A minus sign +.Sq \- +which specifies +.Em left adjustment +of the output in the indicated field; +.It Cm \&+ +A +.Sq \&+ +character specifying that there should always be +a sign placed before the number when using signed formats. +.It Sq \&\ \& +A space specifying that a blank should be left before a positive number +for a signed format. +A +.Sq \&+ +overrides a space if both are used; +.It Cm \&0 +A zero `0' character indicating that zero-padding should be used +rather than blank-padding. +A +.Sq \- +overrides a +.Sq \&0 +if both are used; +.El +.It Field Width : +An optional digit string specifying a +.Em field width ; +if the output string has fewer characters than the field width it will +be blank-padded on the left (or right, if the left-adjustment indicator +has been given) to make up the field width (note that a leading zero +is a flag, but an embedded zero is part of a field width); +.It Precision : +An optional period, +.Sq Cm \&. , +followed by an optional digit string giving a +.Em precision +which specifies the number of digits to appear after the decimal point, +for +.Cm e +and +.Cm f +formats, or the maximum number of characters to be printed +from a string +.Sm off +.Pf ( Cm b , +.Sm on +.Cm B , +and +.Cm s +formats); if the digit string is missing, the precision is treated +as zero; +.It Format : +A character which indicates the type of format to use (one of +.Cm diouxXfwEgGbBcs ) . +.El +.Pp +A field width or precision may be +.Sq Cm \&* +instead of a digit string. +In this case an +.Ar argument +supplies the field width or precision. +.Pp +The format characters and their meanings are: +.Bl -tag -width Fl +.It Cm diouXx +The +.Ar argument +is printed as a signed decimal (d or i), unsigned octal, unsigned decimal, +or unsigned hexadecimal (X or x), respectively. +.It Cm f +The +.Ar argument +is printed in the style +.Sm off +.Pf [\-]ddd Cm \&. No ddd +.Sm on +where the number of d's +after the decimal point is equal to the precision specification for +the argument. +If the precision is missing, 6 digits are given; if the precision +is explicitly 0, no digits and no decimal point are printed. +.It Cm eE +The +.Ar argument +is printed in the style +.Sm off +.Pf [\-]d Cm \&. No ddd Cm e No \\*(Pmdd +.Sm on +where there +is one digit before the decimal point and the number after is equal to +the precision specification for the argument; when the precision is +missing, 6 digits are produced. +An upper-case E is used for an +.Sq E +format. +.It Cm gG +The +.Ar argument +is printed in style +.Cm f +or in style +.Cm e +.Pq Cm E +whichever gives full precision in minimum space. +.It Cm b +Characters from the string +.Ar argument +are printed with backslash-escape sequences expanded. +.br +The following additional backslash-escape sequences are supported: +.Bl -tag -width Ds +.It Cm \ec +Causes +.Nm +to ignore any remaining characters in the string operand containing it, +any remaining string operands, and any additional characters in +the format operand. +.It Cm \e0 Ns Ar num +Write an 8\-bit character whose +.Tn ASCII +value is the 1\-, 2\-, or 3\-digit +octal number +.Ar num . +.It Cm \e^ Ns Ar c +Write the control character +.Ar c . +Generates characters `\e000' through `\e037`, and `\e177' (from `\e^?'). +.It Cm \eM\- Ns Ar c +Write the character +.Ar c +with the 8th bit set. +Generates characters `\e241' through `\e376`. +.It Cm \eM^ Ns Ar c +Write the control character +.Ar c +with the 8th bit set. +Generates characters `\e200' through `\e237`, and `\e377' (from `\eM^?'). +.El +.It Cm B +Characters from the string +.Ar argument +are printed with unprintable characters backslash-escaped using the +.Sm off +.Pf ` Cm \e Ar c No ', +.Pf ` Cm \e^ Ar c No ', +.Pf ` Cm \eM\- Ar c No ' +or +.Pf ` Cm \eM^ Ar c No ', +.Sm on +formats described above. +.It Cm c +The first character of +.Ar argument +is printed. +.It Cm s +Characters from the string +.Ar argument +are printed until the end is reached or until the number of characters +indicated by the precision specification is reached; if the +precision is omitted, all characters in the string are printed. +.It Cm \&% +Print a `%'; no argument is used. +.El +.Pp +In no case does a non-existent or small field width cause truncation of +a field; padding takes place only if the specified field width exceeds +the actual width. +.Sh EXIT STATUS +.Nm +exits 0 on success, 1 on failure. +.Sh SEE ALSO +.Xr echo 1 , +.Xr printf 3 , +.Xr vis 3 , +.Xr printf 9 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.1-2001 . +.Pp +Support for the floating point formats and `*' as a field width and precision +are optional in POSIX. +.Pp +The behaviour of the %B format and the \e', \e", \exxx, \ee and +\e[M][\-|^]c escape sequences are undefined in POSIX. +.Sh BUGS +Since the floating point numbers are translated from +.Tn ASCII +to floating-point and +then back again, floating-point precision may be lost. +.Pp +Hexadecimal character constants are restricted to, and should be specified +as, two character constants. +This is contrary to the ISO C standard but +does guarantee detection of the end of the constant. +.Sh NOTES +All formats which treat the +.Ar argument +as a number first convert the +.Ar argument +from its external representation as a character string +to an internal numeric representation, and then apply the +format to the internal numeric representation, producing +another external character string representation. +One might expect the +.Cm \&%c +format to do likewise, but in fact it does not. +.Pp +To convert a string representation of a decimal, octal, or hexadecimal +number into the corresponding character, two nested +.Nm +invocations may be used, in which the inner invocation +converts the input to an octal string, and the outer +invocation uses the octal string as part of a format. +For example, the following command outputs the character whose code +is 0x0A, which is a newline in ASCII: +.Pp +.Dl printf \&"$(printf \&"\e\e%o" \&"0x0A")" diff --git a/commands/printf/printf.c b/commands/printf/printf.c new file mode 100644 index 000000000..386f4cdfc --- /dev/null +++ b/commands/printf/printf.c @@ -0,0 +1,693 @@ +/* $NetBSD: printf.c,v 1.33.8.1 2009/10/14 18:37:30 sborrill Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +#if !defined(BUILTIN) && !defined(SHELL) +__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ + The Regents of the University of California. All rights reserved."); +#endif +#endif + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95"; +#else +__RCSID("$NetBSD: printf.c,v 1.33.8.1 2009/10/14 18:37:30 sborrill Exp $"); +#endif +#endif /* not lint */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __GNUC__ +#define ESCAPE '\e' +#else +#define ESCAPE 033 +#endif + +static void conv_escape_str(char *, void (*)(int)); +static char *conv_escape(char *, char *); +static char *conv_expand(const char *); +static int getchr(void); +static double getdouble(void); +static int getwidth(void); +static intmax_t getintmax(void); +static uintmax_t getuintmax(void); +static char *getstr(void); +static char *mklong(const char *, int); +static void check_conversion(const char *, const char *); +static void usage(void); + +static void b_count(int); +static void b_output(int); +static size_t b_length; +static char *b_fmt; + +static int rval; +static char **gargv; + +#ifdef BUILTIN /* csh builtin */ +#define main progprintf +#endif + +#ifdef SHELL /* sh (aka ash) builtin */ +#define main printfcmd +#include "../../bin/sh/bltin/bltin.h" +#endif /* SHELL */ + +#define PF(f, func) { \ + if (fieldwidth != -1) { \ + if (precision != -1) \ + error = printf(f, fieldwidth, precision, func); \ + else \ + error = printf(f, fieldwidth, func); \ + } else if (precision != -1) \ + error = printf(f, precision, func); \ + else \ + error = printf(f, func); \ +} + +#define APF(cpp, f, func) { \ + if (fieldwidth != -1) { \ + if (precision != -1) \ + error = asprintf(cpp, f, fieldwidth, precision, func); \ + else \ + error = asprintf(cpp, f, fieldwidth, func); \ + } else if (precision != -1) \ + error = asprintf(cpp, f, precision, func); \ + else \ + error = asprintf(cpp, f, func); \ +} + +#ifdef main +int main(int, char *[]); +#endif +int main(int argc, char *argv[]) +{ + char *fmt, *start; + int fieldwidth, precision; + char nextch; + char *format; + int ch; + int error; + +#if !defined(SHELL) && !defined(BUILTIN) + (void)setlocale (LC_ALL, ""); +#endif + + while ((ch = getopt(argc, argv, "")) != -1) { + switch (ch) { + case '?': + default: + usage(); + return 1; + } + } + argc -= optind; + argv += optind; + + if (argc < 1) { + usage(); + return 1; + } + + format = *argv; + gargv = ++argv; + +#define SKIP1 "#-+ 0" +#define SKIP2 "0123456789" + do { + /* + * Basic algorithm is to scan the format string for conversion + * specifications -- once one is found, find out if the field + * width or precision is a '*'; if it is, gather up value. + * Note, format strings are reused as necessary to use up the + * provided arguments, arguments of zero/null string are + * provided to use up the format string. + */ + + /* find next format specification */ + for (fmt = format; (ch = *fmt++) != '\0';) { + if (ch == '\\') { + char c_ch; + fmt = conv_escape(fmt, &c_ch); + putchar(c_ch); + continue; + } + if (ch != '%' || (*fmt == '%' && ++fmt)) { + (void)putchar(ch); + continue; + } + + /* Ok - we've found a format specification, + Save its address for a later printf(). */ + start = fmt - 1; + + /* skip to field width */ + fmt += strspn(fmt, SKIP1); + if (*fmt == '*') { + fmt++; + fieldwidth = getwidth(); + } else + fieldwidth = -1; + + /* skip to possible '.', get following precision */ + fmt += strspn(fmt, SKIP2); + if (*fmt == '.') { + fmt++; + if (*fmt == '*') { + fmt++; + precision = getwidth(); + } else + precision = -1; + } else + precision = -1; + + fmt += strspn(fmt, SKIP2); + + ch = *fmt; + if (!ch) { + warnx("missing format character"); + return (1); + } + /* null terminate format string to we can use it + as an argument to printf. */ + nextch = fmt[1]; + fmt[1] = 0; + switch (ch) { + + case 'B': { + const char *p = conv_expand(getstr()); + if (p == NULL) + goto out; + *fmt = 's'; + PF(start, p); + if (error < 0) + goto out; + break; + } + case 'b': { + /* There has to be a better way to do this, + * but the string we generate might have + * embedded nulls. */ + static char *a, *t; + char *cp = getstr(); + /* Free on entry in case shell longjumped out */ + if (a != NULL) + free(a); + a = NULL; + if (t != NULL) + free(t); + t = NULL; + /* Count number of bytes we want to output */ + b_length = 0; + conv_escape_str(cp, b_count); + t = malloc(b_length + 1); + if (t == NULL) + goto out; + (void)memset(t, 'x', b_length); + t[b_length] = 0; + /* Get printf to calculate the lengths */ + *fmt = 's'; + APF(&a, start, t); + if (error == -1) + goto out; + b_fmt = a; + /* Output leading spaces and data bytes */ + conv_escape_str(cp, b_output); + /* Add any trailing spaces */ + printf("%s", b_fmt); + break; + } + case 'c': { + char p = getchr(); + PF(start, p); + if (error < 0) + goto out; + break; + } + case 's': { + char *p = getstr(); + PF(start, p); + if (error < 0) + goto out; + break; + } + case 'd': + case 'i': { + intmax_t p = getintmax(); + char *f = mklong(start, ch); + PF(f, p); + if (error < 0) + goto out; + break; + } + case 'o': + case 'u': + case 'x': + case 'X': { + uintmax_t p = getuintmax(); + char *f = mklong(start, ch); + PF(f, p); + if (error < 0) + goto out; + break; + } + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': { + double p = getdouble(); + PF(start, p); + if (error < 0) + goto out; + break; + } + default: + warnx("%s: invalid directive", start); + return 1; + } + *fmt++ = ch; + *fmt = nextch; + /* escape if a \c was encountered */ + if (rval & 0x100) + return rval & ~0x100; + } + } while (gargv != argv && *gargv); + + return rval & ~0x100; +out: + warn("print failed"); + return 1; +} + +/* helper functions for conv_escape_str */ + +static void +/*ARGSUSED*/ +b_count(int ch) +{ + b_length++; +} + +/* Output one converted character for every 'x' in the 'format' */ + +static void +b_output(int ch) +{ + for (;;) { + switch (*b_fmt++) { + case 0: + b_fmt--; + return; + case ' ': + putchar(' '); + break; + default: + putchar(ch); + return; + } + } +} + + +/* + * Print SysV echo(1) style escape string + * Halts processing string if a \c escape is encountered. + */ +static void +conv_escape_str(char *str, void (*do_putchar)(int)) +{ + int value; + int ch; + char c; + + while ((ch = *str++) != '\0') { + if (ch != '\\') { + do_putchar(ch); + continue; + } + + ch = *str++; + if (ch == 'c') { + /* \c as in SYSV echo - abort all processing.... */ + rval |= 0x100; + break; + } + + /* + * %b string octal constants are not like those in C. + * They start with a \0, and are followed by 0, 1, 2, + * or 3 octal digits. + */ + if (ch == '0') { + int octnum = 0, i; + for (i = 0; i < 3; i++) { + if (!isdigit((unsigned char)*str) || *str > '7') + break; + octnum = (octnum << 3) | (*str++ - '0'); + } + do_putchar(octnum); + continue; + } + + /* \[M][^|-]C as defined by vis(3) */ + if (ch == 'M' && *str == '-') { + do_putchar(0200 | str[1]); + str += 2; + continue; + } + if (ch == 'M' && *str == '^') { + str++; + value = 0200; + ch = '^'; + } else + value = 0; + if (ch == '^') { + ch = *str++; + if (ch == '?') + value |= 0177; + else + value |= ch & 037; + do_putchar(value); + continue; + } + + /* Finally test for sequences valid in the format string */ + str = conv_escape(str - 1, &c); + do_putchar(c); + } +} + +/* + * Print "standard" escape characters + */ +static char * +conv_escape(char *str, char *conv_ch) +{ + int value; + int ch; + char num_buf[4], *num_end; + + ch = *str++; + + switch (ch) { + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + num_buf[0] = ch; + ch = str[0]; + num_buf[1] = ch; + num_buf[2] = ch ? str[1] : 0; + num_buf[3] = 0; + value = strtoul(num_buf, &num_end, 8); + str += num_end - (num_buf + 1); + break; + + case 'x': + /* Hexadecimal character constants are not required to be + supported (by SuS v1) because there is no consistent + way to detect the end of the constant. + Supporting 2 byte constants is a compromise. */ + ch = str[0]; + num_buf[0] = ch; + num_buf[1] = ch ? str[1] : 0; + num_buf[2] = 0; + value = strtoul(num_buf, &num_end, 16); + str += num_end - num_buf; + break; + + case '\\': value = '\\'; break; /* backslash */ + case '\'': value = '\''; break; /* single quote */ + case '"': value = '"'; break; /* double quote */ + case 'a': value = '\a'; break; /* alert */ + case 'b': value = '\b'; break; /* backspace */ + case 'e': value = ESCAPE; break; /* escape */ + case 'f': value = '\f'; break; /* form-feed */ + case 'n': value = '\n'; break; /* newline */ + case 'r': value = '\r'; break; /* carriage-return */ + case 't': value = '\t'; break; /* tab */ + case 'v': value = '\v'; break; /* vertical-tab */ + + default: + warnx("unknown escape sequence `\\%c'", ch); + rval = 1; + value = ch; + break; + } + + *conv_ch = value; + return str; +} + +/* expand a string so that everything is printable */ + +static char * +conv_expand(const char *str) +{ + static char *conv_str; + char *cp; + int ch; + + if (conv_str) + free(conv_str); + /* get a buffer that is definitely large enough.... */ + conv_str = malloc(4 * strlen(str) + 1); + if (!conv_str) + return NULL; + cp = conv_str; + + while ((ch = *(const unsigned char *)str++) != '\0') { + switch (ch) { + /* Use C escapes for expected control characters */ + case '\\': ch = '\\'; break; /* backslash */ + case '\'': ch = '\''; break; /* single quote */ + case '"': ch = '"'; break; /* double quote */ + case '\a': ch = 'a'; break; /* alert */ + case '\b': ch = 'b'; break; /* backspace */ + case ESCAPE: ch = 'e'; break; /* escape */ + case '\f': ch = 'f'; break; /* form-feed */ + case '\n': ch = 'n'; break; /* newline */ + case '\r': ch = 'r'; break; /* carriage-return */ + case '\t': ch = 't'; break; /* tab */ + case '\v': ch = 'v'; break; /* vertical-tab */ + default: + /* Copy anything printable */ + if (isprint(ch)) { + *cp++ = ch; + continue; + } + /* Use vis(3) encodings for the rest */ + *cp++ = '\\'; + if (ch & 0200) { + *cp++ = 'M'; + ch &= ~0200; + } + if (ch == 0177) { + *cp++ = '^'; + *cp++ = '?'; + continue; + } + if (ch < 040) { + *cp++ = '^'; + *cp++ = ch | 0100; + continue; + } + *cp++ = '-'; + *cp++ = ch; + continue; + } + *cp++ = '\\'; + *cp++ = ch; + } + + *cp = 0; + return conv_str; +} + +static char * +mklong(const char *str, int ch) +{ + static char copy[64]; + size_t len; + + len = strlen(str) + 2; + if (len > sizeof copy) { + warnx("format %s too complex\n", str); + len = 4; + } + (void)memmove(copy, str, len - 3); + copy[len - 3] = 'j'; + copy[len - 2] = ch; + copy[len - 1] = '\0'; + return copy; +} + +static int +getchr(void) +{ + if (!*gargv) + return 0; + return (int)**gargv++; +} + +static char * +getstr(void) +{ + static char empty[] = ""; + if (!*gargv) + return empty; + return *gargv++; +} + +static int +getwidth(void) +{ + long val; + char *s, *ep; + + s = *gargv; + if (!*gargv) + return (0); + gargv++; + + errno = 0; + val = strtoul(s, &ep, 0); + check_conversion(s, ep); + + /* Arbitrarily 'restrict' field widths to 1Mbyte */ + if (val < 0 || val > 1 << 20) { + warnx("%s: invalid field width", s); + return 0; + } + + return val; +} + +static intmax_t +getintmax(void) +{ + intmax_t val; + char *cp, *ep; + + cp = *gargv; + if (cp == NULL) + return 0; + gargv++; + + if (*cp == '\"' || *cp == '\'') + return *(cp+1); + + errno = 0; + val = strtoimax(cp, &ep, 0); + check_conversion(cp, ep); + return val; +} + +static uintmax_t +getuintmax(void) +{ + uintmax_t val; + char *cp, *ep; + + cp = *gargv; + if (cp == NULL) + return 0; + gargv++; + + if (*cp == '\"' || *cp == '\'') + return *(cp + 1); + + /* strtoumax won't error -ve values */ + while (isspace(*(unsigned char *)cp)) + cp++; + if (*cp == '-') { + warnx("%s: expected positive numeric value", cp); + rval = 1; + return 0; + } + + errno = 0; + val = strtoumax(cp, &ep, 0); + check_conversion(cp, ep); + return val; +} + +static double +getdouble(void) +{ + double val; + char *ep; + + if (!*gargv) + return (0.0); + + if (**gargv == '\"' || **gargv == '\'') + return (double) *((*gargv++)+1); + + errno = 0; + val = strtod(*gargv, &ep); + check_conversion(*gargv++, ep); + return val; +} + +static void +check_conversion(const char *s, const char *ep) +{ + if (*ep) { + if (ep == s) + warnx("%s: expected numeric value", s); + else + warnx("%s: not completely converted", s); + rval = 1; + } else if (errno == ERANGE) { + warnx("%s: %s", s, strerror(ERANGE)); + rval = 1; + } +} + +static void +usage(void) +{ + (void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname()); +} -- 2.44.0