From d5c6c6a51bc6be074cafe4527f7a4e63ee737e01 Mon Sep 17 00:00:00 2001 From: Lionel Sambuc Date: Sat, 24 Aug 2013 22:11:19 +0200 Subject: [PATCH] Importing usr.bin/tr Change-Id: I4563198f111f2ec3687f6a5084edd05f243c2263 --- commands/Makefile | 2 +- commands/tr/Makefile | 5 - man/man1/Makefile | 2 +- man/man1/tr.1 | 70 ----- releasetools/nbsd_ports | 1 + usr.bin/Makefile | 2 +- usr.bin/tr/Makefile | 11 + commands/tr/tr.h => usr.bin/tr/extern.h | 4 +- {commands => usr.bin}/tr/str.c | 239 ++++++++-------- usr.bin/tr/tr.1 | 350 ++++++++++++++++++++++++ {commands => usr.bin}/tr/tr.c | 39 +-- 11 files changed, 494 insertions(+), 231 deletions(-) delete mode 100644 commands/tr/Makefile delete mode 100644 man/man1/tr.1 create mode 100644 usr.bin/tr/Makefile rename commands/tr/tr.h => usr.bin/tr/extern.h (96%) rename {commands => usr.bin}/tr/str.c (58%) create mode 100644 usr.bin/tr/tr.1 rename {commands => usr.bin}/tr/tr.c (92%) diff --git a/commands/Makefile b/commands/Makefile index cfd3960b2..bfe16beb3 100644 --- a/commands/Makefile +++ b/commands/Makefile @@ -26,7 +26,7 @@ SUBDIR= add_route arp ash at backup btrace \ sleep slip spell split sprofalyze sprofdiff srccrc \ stty svclog svrctl swifi synctree sysenv \ syslogd tail tcpd tcpdp tcpstat tee telnet \ - telnetd term termcap tget time touch tr \ + telnetd term termcap tget time touch \ truncate tty udpstat umount uname \ unstack update uud uue version vol \ whereis which write writeisofs fetch \ diff --git a/commands/tr/Makefile b/commands/tr/Makefile deleted file mode 100644 index 517d67ef3..000000000 --- a/commands/tr/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -PROG= tr -SRCS= tr.c str.c -MAN= - -.include diff --git a/man/man1/Makefile b/man/man1/Makefile index e109aa106..2e7f2b231 100644 --- a/man/man1/Makefile +++ b/man/man1/Makefile @@ -18,7 +18,7 @@ MAN= ash.1 at.1 \ shar.1 sleep.1 spell.1 \ split.1 stty.1 svc.1 svrctl.1 \ synctree.1 sysenv.1 sz.1 tail.1 tee.1 telnet.1 template.1 \ - term.1 termcap.1 tget.1 time.1 tr.1 true.1 \ + term.1 termcap.1 tget.1 time.1 true.1 \ truncate.1 tty.1 umount.1 uname.1 \ uud.1 uue.1 vol.1 whereis.1 which.1 \ write.1 yap.1 linkfarm.1 pkg_view.1 diff --git a/man/man1/tr.1 b/man/man1/tr.1 deleted file mode 100644 index 2b66de49b..000000000 --- a/man/man1/tr.1 +++ /dev/null @@ -1,70 +0,0 @@ -.TH TR 1 -.SH NAME -tr \- translate character codes -.SH SYNOPSIS -\fBtr\fR [\fB\-cds\fR]\fR [\fIstring1\fR] [\fIstring2\fR]\fR -.br -.de FL -.TP -\\fB\\$1\\fR -\\$2 -.. -.de EX -.TP 20 -\\fB\\$1\\fR -# \\$2 -.. -.SH OPTIONS -.TP 5 -.B \-c -# Complement the set of characters in \fIstring1\fR -.TP 5 -.B \-d -# Delete all characters specified in \fIstring1\fR -.TP 5 -.B \-s -# Squeeze all runs of characters in \fIstring1\fR to one character -.SH EXAMPLES -.TP 20 -.B tr \(fmA\-Z\(fm \(fma\-z\(fm y -# Convert upper case to lower case -.TP 20 -.B tr \-d \(fm0123456789\(fm f2 -# Delete all digits from \fIf1\fR -.SH DESCRIPTION -.PP -.I Tr -performs simple character translation. -When no flag is specified, each character in -.I string1 -is mapped onto the corresponding character in -.I string2 . -.PP -There are two types of -.I tr -out there, one that requires [ and ] for character classes, and one that does -not. Here is what the example above would look like for a -.I tr -that needs the brackets: -.PP -.RS -.B "tr \(fm[A\-Z]\(fm \(fm[a\-z]\(fm y" -.RE -.PP -Use [ and ] if you want to be portable, because a -.I tr -that doesn't need them will still accept the syntax and mindlessly -translate [ into [ and ] into ]. -.PP -MINIX tr supports the following character classes: alnum, alpha, digit, lower, -upper and xdigit. If any of these keywords is encountered between backets and -colons, it is replaced by respectively alphanumeric characters, alphabetic -characters, decimal digits, lowercase letters, uppercase letters and -hexadecimal digits. The following are equivalent with the given examples: -.TP 20 -.B tr \(fm[:upper:]\(fm \(fm[:lower:]\(fm y -# Convert upper case to lower case -.TP 20 -.B tr \-d \(fm[:digit:]\(fm f2 -# Delete all digits from \fIf1\fR - diff --git a/releasetools/nbsd_ports b/releasetools/nbsd_ports index f9bc22df6..5de902ff8 100644 --- a/releasetools/nbsd_ports +++ b/releasetools/nbsd_ports @@ -189,6 +189,7 @@ 2012/02/10 16:16:12,usr.bin/su 2012/06/01 12:08:40,usr.bin/tic 2012/10/17 12:00:00,usr.bin/tput +2012/10/17 12:00:00,usr.bin/tr 2012/10/17 12:00:00,usr.bin/tsort 2010/10/06 07:59:18,usr.bin/uniq 2012/10/17 12:00:00,usr.bin/wc diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 5b7e0dad3..63b52eaa6 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -26,7 +26,7 @@ SUBDIR= \ shuffle sed seq \ sort stat su \ tic tput \ - tsort unexpand \ + tr tsort unexpand \ toproto \ uniq \ \ diff --git a/usr.bin/tr/Makefile b/usr.bin/tr/Makefile new file mode 100644 index 000000000..073124e55 --- /dev/null +++ b/usr.bin/tr/Makefile @@ -0,0 +1,11 @@ +# $NetBSD: Makefile,v 1.8 2012/08/10 12:10:28 joerg Exp $ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= tr +SRCS= str.c tr.c + +.for f in str tr +COPTS.${f}.c+= -Wno-pointer-sign +.endfor + +.include diff --git a/commands/tr/tr.h b/usr.bin/tr/extern.h similarity index 96% rename from commands/tr/tr.h rename to usr.bin/tr/extern.h index 1c5ef3438..8e933330c 100644 --- a/commands/tr/tr.h +++ b/usr.bin/tr/extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: extern.h,v 1.6 2003/08/07 11:16:46 agc Exp $ */ +/* $NetBSD: extern.h,v 1.7 2011/09/06 18:33:46 joerg Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -45,4 +45,4 @@ typedef struct { #define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */ #define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */ -int next (STR *); +int next(STR *); diff --git a/commands/tr/str.c b/usr.bin/tr/str.c similarity index 58% rename from commands/tr/str.c rename to usr.bin/tr/str.c index 88dcebbab..c853b318c 100644 --- a/commands/tr/str.c +++ b/usr.bin/tr/str.c @@ -1,4 +1,4 @@ -/* $NetBSD: str.c,v 1.12 2009/04/13 23:50:49 lukem Exp $ */ +/* $NetBSD: str.c,v 1.19 2011/09/08 12:00:26 christos Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -29,6 +29,14 @@ * SUCH DAMAGE. */ +#include +#ifndef lint +#if 0 +static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95"; +#endif +__RCSID("$NetBSD: str.c,v 1.19 2011/09/08 12:00:26 christos Exp $"); +#endif /* not lint */ + #include #include @@ -39,38 +47,37 @@ #include #include -#include "tr.h" +#include "extern.h" -static int backslash (STR *); -static int bracket (STR *); -static int c_class (const void *, const void *); -static void genclass (STR *); -static void genequiv (STR *); -static int genrange (STR *); -static void genseq (STR *); +static int backslash(STR *); +static int bracket(STR *); +static int c_class(const void *, const void *); +static void genclass(STR *); +static void genequiv(STR *); +static int genrange(STR *); +static void genseq(STR *); int -next(s) - STR *s; +next(STR *s) { int ch; switch (s->state) { case EOS: - return (0); + return 0; case INFINITE: - return (1); + return 1; case NORMAL: switch (ch = *s->str) { case '\0': s->state = EOS; - return (0); + return 0; case '\\': s->lastch = backslash(s); break; case '[': if (bracket(s)) - return (next(s)); + return next(s); /* FALLTHROUGH */ default: ++s->str; @@ -80,121 +87,114 @@ next(s) /* We can start a range at any time. */ if (s->str[0] == '-' && genrange(s)) - return (next(s)); - return (1); + return next(s); + return 1; case RANGE: if (s->cnt-- == 0) { s->state = NORMAL; - return (next(s)); + return next(s); } ++s->lastch; - return (1); + return 1; case SEQUENCE: if (s->cnt-- == 0) { s->state = NORMAL; - return (next(s)); + return next(s); } - return (1); + return 1; case SET: if ((s->lastch = s->set[s->cnt++]) == OOBCH) { s->state = NORMAL; - return (next(s)); + return next(s); } - return (1); + return 1; } /* NOTREACHED */ - return (0); + return 0; } static int -bracket(s) - STR *s; +bracket(STR *s) { char *p; switch (s->str[1]) { case ':': /* "[:class:]" */ - if ((p = strstr((char *) s->str + 2, ":]")) == NULL) - return (0); + if ((p = strstr(s->str + 2, ":]")) == NULL) + return 0; *p = '\0'; s->str += 2; genclass(s); - s->str = (unsigned char *) p + 2; - return (1); + s->str = p + 2; + return 1; case '=': /* "[=equiv=]" */ - if ((p = strstr((char *) s->str + 2, "=]")) == NULL) - return (0); + if ((p = strstr(s->str + 2, "=]")) == NULL) + return 0; s->str += 2; genequiv(s); - return (1); + return 1; default: /* "[\###*n]" or "[#*n]" */ - if ((p = strpbrk((char *) s->str + 2, "*]")) == NULL) - return (0); + if ((p = strpbrk(s->str + 2, "*]")) == NULL) + return 0; if (p[0] != '*' || strchr(p, ']') == NULL) - return (0); + return 0; s->str += 1; genseq(s); - return (1); + return 1; } /* NOTREACHED */ } typedef struct { const char *name; - int (*func) (int); - int *set; + int (*func)(int); } CLASS; -static CLASS classes[] = { - { "alnum", isalnum, NULL, }, - { "alpha", isalpha, NULL, }, - { "blank", isblank, NULL, }, - { "cntrl", iscntrl, NULL, }, - { "digit", isdigit, NULL, }, - { "graph", isgraph, NULL, }, - { "lower", islower, NULL, }, - { "print", isprint, NULL, }, - { "punct", ispunct, NULL, }, - { "space", isspace, NULL, }, - { "upper", isupper, NULL, }, - { "xdigit", isxdigit, NULL, }, +static const CLASS classes[] = { + { "alnum", isalnum }, + { "alpha", isalpha }, + { "blank", isblank }, + { "cntrl", iscntrl }, + { "digit", isdigit }, + { "graph", isgraph }, + { "lower", islower }, + { "print", isprint }, + { "punct", ispunct }, + { "space", isspace }, + { "upper", isupper }, + { "xdigit", isxdigit }, }; static void -genclass(s) - STR *s; +genclass(STR *s) { - int cnt, (*func) (int); - CLASS *cp, tmp; + int cnt; + const CLASS *cp; + CLASS tmp; int *p; - tmp.name = (char *) s->str; - if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / - sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) { - fprintf(stderr, "tr: unknown class %s\n", s->str); - exit(1); - } + tmp.name = s->str; + if ((cp = bsearch(&tmp, classes, sizeof(classes) / + sizeof(*cp), sizeof(*cp), c_class)) == NULL) + errx(1, "unknown class %s", s->str); - if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) { - perror("malloc"); - exit(1); - } - memset(p, 0, (NCHARS + 1) * sizeof(int)); - for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) - if ((func)(cnt)) + if ((s->set = p = malloc((NCHARS + 1) * sizeof(*p))) == NULL) + err(1, "malloc"); + + for (cnt = 0; cnt < NCHARS; ++cnt) + if ((*cp->func)(cnt)) *p++ = cnt; - *p = OOBCH; + *p++ = OOBCH; + memset(p, 0, NCHARS + 1 - (p - s->set)); s->cnt = 0; s->state = SET; - s->set = cp->set; } static int -c_class(a, b) - const void *a, *b; +c_class(const void *a, const void *b) { - return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name)); + return strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name); } /* @@ -202,21 +202,16 @@ c_class(a, b) * we just syntax check and grab the character. */ static void -genequiv(s) - STR *s; +genequiv(STR *s) { if (*s->str == '\\') { s->equiv[0] = backslash(s); - if (*s->str != '=') { - fprintf(stderr, "tr: misplaced equivalence equals sign\n"); - exit(1); - } + if (*s->str != '=') + errx(1, "misplaced equivalence equals sign"); } else { s->equiv[0] = s->str[0]; - if (s->str[1] != '=') { - fprintf(stderr, "tr: misplaced equivalence equals sign\n"); - exit(1); - } + if (s->str[1] != '=') + errx(1, "misplaced equivalence equals sign"); } s->str += 2; s->cnt = 0; @@ -225,43 +220,37 @@ genequiv(s) } static int -genrange(s) - STR *s; +genrange(STR *s) { int stopval; - unsigned char *savestart; + char *savestart; savestart = s->str; stopval = *++s->str == '\\' ? backslash(s) : *s->str++; if (stopval < (u_char)s->lastch) { s->str = savestart; - return (0); + return 0; } s->cnt = stopval - s->lastch + 1; s->state = RANGE; --s->lastch; - return (1); + return 1; } static void -genseq(s) - STR *s; +genseq(STR *s) { char *ep; - if (s->which == STRING1) { - fprintf(stderr, "tr: sequences only valid in string2\n"); - exit(1); - } + if (s->which == STRING1) + errx(1, "sequences only valid in string2"); if (*s->str == '\\') s->lastch = backslash(s); else s->lastch = *s->str++; - if (*s->str != '*') { - fprintf(stderr, "tr: misplaced sequence asterisk\n"); - exit(1); - } + if (*s->str != '*') + errx(1, "misplaced sequence asterisk"); switch (*++s->str) { case '\\': @@ -273,14 +262,13 @@ genseq(s) break; default: if (isdigit(*s->str)) { - s->cnt = strtol((char *) s->str, &ep, 0); + s->cnt = strtol(s->str, &ep, 0); if (*ep == ']') { - s->str = (unsigned char *) ep + 1; + s->str = ep + 1; break; } } - fprintf(stderr, "tr: illegal sequence count\n"); - exit(1); + errx(1, "illegal sequence count"); /* NOTREACHED */ } @@ -292,8 +280,7 @@ genseq(s) * an escape code or a literal character. */ static int -backslash(s) - STR *s; +backslash(STR *s) { int ch, cnt, val; @@ -308,28 +295,30 @@ backslash(s) } } if (cnt) - return (val); + return val; if (ch != '\0') ++s->str; switch (ch) { - case 'a': /* escape characters */ - return ('\7'); - case 'b': - return ('\b'); - case 'f': - return ('\f'); - case 'n': - return ('\n'); - case 'r': - return ('\r'); - case 't': - return ('\t'); - case 'v': - return ('\13'); - case '\0': /* \" -> \ */ - s->state = EOS; - return ('\\'); - default: /* \x" -> x */ - return (ch); + case 'a': /* escape characters */ + return '\7'; + case 'b': + return '\b'; + case 'e': + return '\033'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\13'; + case '\0': /* \" -> \ */ + s->state = EOS; + return '\\'; + default: /* \x" -> x */ + return ch; } } diff --git a/usr.bin/tr/tr.1 b/usr.bin/tr/tr.1 new file mode 100644 index 000000000..28f58699a --- /dev/null +++ b/usr.bin/tr/tr.1 @@ -0,0 +1,350 @@ +.\" $NetBSD: tr.1,v 1.18 2009/11/12 00:43:53 joerg Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)tr.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd March 23, 2004 +.Dt TR 1 +.Os +.Sh NAME +.Nm tr +.Nd translate characters +.Sh SYNOPSIS +.Nm +.Op Fl cs +.Ar string1 string2 +.Nm +.Op Fl c +.Fl d +.Ar string1 +.Nm +.Op Fl c +.Fl s +.Ar string1 +.Nm +.Op Fl c +.Fl ds +.Ar string1 string2 +.Sh DESCRIPTION +The +.Nm +utility copies the standard input to the standard output with substitution +or deletion of selected characters. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +Complements the set of characters in +.Ar string1 , +that is +.Fl c Ar \&ab +includes every character except for +.Sq a +and +.Sq b . +.It Fl d +The +.Fl d +option causes characters to be deleted from the input. +.It Fl s +The +.Fl s +option squeezes multiple occurrences of the characters listed in the last +operand (either +.Ar string1 +or +.Ar string2 ) +in the input into a single instance of the character. +This occurs after all deletion and translation is completed. +.El +.Pp +In the first synopsis form, the characters in +.Ar string1 +are translated into the characters in +.Ar string2 +where the first character in +.Ar string1 +is translated into the first character in +.Ar string2 +and so on. +If +.Ar string1 +is longer than +.Ar string2 , +the last character found in +.Ar string2 +is duplicated until +.Ar string1 +is exhausted. +.Pp +In the second synopsis form, the characters in +.Ar string1 +are deleted from the input. +.Pp +In the third synopsis form, the characters in +.Ar string1 +are compressed as described for the +.Fl s +option. +.Pp +In the fourth synopsis form, the characters in +.Ar string1 +are deleted from the input, and the characters in +.Ar string2 +are compressed as described for the +.Fl s +option. +.Pp +The following conventions can be used in +.Ar string1 +and +.Ar string2 +to specify sets of characters: +.Bl -tag -width [:equiv:] +.It character +Any character not described by one of the following conventions +represents itself. +.It \eoctal +A backslash followed by 1, 2 or 3 octal digits represents a character +with that encoded value. +To follow an octal sequence with a digit as a character, left zero-pad +the octal sequence to the full 3 octal digits. +.It \echaracter +A backslash followed by certain special characters maps to special +values. +.sp +.Bl -column cc +.It \ea \*[Lt]alert character\*[Gt] +.It \eb \*[Lt]backspace\*[Gt] +.It \ef \*[Lt]form-feed\*[Gt] +.It \en \*[Lt]newline\*[Gt] +.It \er \*[Lt]carriage return\*[Gt] +.It \et \*[Lt]tab\*[Gt] +.It \ev \*[Lt]vertical tab\*[Gt] +.El +.sp +A backslash followed by any other character maps to that character. +.It c-c +Represents the range of characters between the range endpoints, inclusively. +.It [:class:] +Represents all characters belonging to the defined character class. +Class names are: +.sp +.Bl -column xdigit +.It alnum \*[Lt]alphanumeric characters\*[Gt] +.It alpha \*[Lt]alphabetic characters\*[Gt] +.It blank \*[Lt]blank characters\*[Gt] +.It cntrl \*[Lt]control characters\*[Gt] +.It digit \*[Lt]numeric characters\*[Gt] +.It graph \*[Lt]graphic characters\*[Gt] +.It lower \*[Lt]lower-case alphabetic characters\*[Gt] +.It print \*[Lt]printable characters\*[Gt] +.It punct \*[Lt]punctuation characters\*[Gt] +.It space \*[Lt]space characters\*[Gt] +.It upper \*[Lt]upper-case characters\*[Gt] +.It xdigit \*[Lt]hexadecimal characters\*[Gt] +.El +.Pp +.\" All classes may be used in +.\" .Ar string1 , +.\" and in +.\" .Ar string2 +.\" when both the +.\" .Fl d +.\" and +.\" .Fl s +.\" options are specified. +.\" Otherwise, only the classes ``upper'' and ``lower'' may be used in +.\" .Ar string2 +.\" and then only when the corresponding class (``upper'' for ``lower'' +.\" and vice-versa) is specified in the same relative position in +.\" .Ar string1 . +.\" .Pp +With the exception of the +.Dq upper +and +.Dq lower +classes, characters in the classes are in unspecified order. +In the +.Dq upper +and +.Dq lower +classes, characters are entered in ascending order. +.Pp +For specific information as to which ASCII characters are included +in these classes, see +.Xr ctype 3 +and related manual pages. +.It [=equiv=] +Represents all characters or collating (sorting) elements belonging to +the same equivalence class as +.Ar equiv . +If there is a secondary ordering within the equivalence class, the +characters are ordered in ascending sequence. +Otherwise, they are ordered after their encoded values. +An example of an equivalence class might be +.Dq \&c +and +.Dq \&ch +in Spanish; +English has no equivalence classes. +.It [#*n] +Represents +.Ar n +repeated occurrences of the character represented by +.Ar # . +This +expression is only valid when it occurs in +.Ar string2 . +If +.Ar n +is omitted or is zero, it is interpreted as large enough to extend +.Ar string2 +sequence to the length of +.Ar string1 . +If +.Ar n +has a leading zero, it is interpreted as an octal value, otherwise, +it's interpreted as a decimal value. +.El +.Sh EXIT STATUS +.Nm +exits 0 on success, and \*[Gt]0 if an error occurs. +.Sh EXAMPLES +The following examples are shown as given to the shell: +.Pp +Create a list of the words in +.Ar file1 , +one per line, where a word is taken to be a maximal string of letters: +.sp +.D1 Li "tr -cs \*q[:alpha:]\*q \*q\en\*q \*[Lt] file1" +.sp +Translate the contents of +.Ar file1 +to upper-case: +.sp +.D1 Li "tr \*q[:lower:]\*q \*q[:upper:]\*q \*[Lt] file1" +.sp +Strip out non-printable characters from +.Ar file1 : +.sp +.D1 Li "tr -cd \*q[:print:]\*q \*[Lt] file1" +.Sh COMPATIBILITY +.At V +has historically implemented character ranges using the syntax +.Dq [c-c] +instead of the +.Dq c-c +used by historic +.Bx +implementations and standardized by POSIX. +.At V +shell scripts should work under this implementation as long as +the range is intended to map in another range, i.e. the command +.Pp +.Ic "tr [a-z] [A-Z]" +.Pp +will work as it will map the +.Sq \&[ +character in +.Ar string1 +to the +.Sq \&[ +character in +.Ar string2 . +However, if the shell script is deleting or squeezing characters as in +the command +.Pp +.Ic "tr -d [a-z]" +.Pp +the characters +.Sq \&[ +and +.Sq \&] +will be included in the deletion or compression list which would +not have happened under an historic +.At V +implementation. +Additionally, any scripts that depended on the sequence +.Dq a-z +to represent the three characters +.Sq \&a , +.Sq \&- , +and +.Sq \&z +will have to be rewritten as +.Dq a\e-z . +.Pp +The +.Nm +utility has historically not permitted the manipulation of NUL bytes in +its input and, additionally, stripped NUL's from its input stream. +This implementation has removed this behavior as a bug. +.Pp +The +.Nm +utility has historically been extremely forgiving of syntax errors, +for example, the +.Fl c +and +.Fl s +options were ignored unless two strings were specified. +This implementation will not permit illegal syntax. +.Sh STANDARDS +The +.Nm +utility is expected to be +.St -p1003.2 +compatible. +It should be noted that the feature wherein the last character of +.Ar string2 +is duplicated if +.Ar string2 +has less characters than +.Ar string1 +is permitted by POSIX but is not required. +Shell scripts attempting to be portable to other POSIX systems should use +the +.Dq [#*] +convention instead of relying on this behavior. +.Sh BUGS +.Nm +was originally designed to work with +.Tn US-ASCII . +Its use with character sets that do not share all the properties of +.Tn US-ASCII , +e.g., a symmetric set of upper and lower case characters +that can be algorithmically converted one to the other, +may yield unpredictable results. +.Pp +.Nm +should be internationalized. diff --git a/commands/tr/tr.c b/usr.bin/tr/tr.c similarity index 92% rename from commands/tr/tr.c rename to usr.bin/tr/tr.c index 0a54cc062..89b4b351f 100644 --- a/commands/tr/tr.c +++ b/usr.bin/tr/tr.c @@ -1,4 +1,4 @@ -/* $NetBSD: tr.c,v 1.8 2008/07/21 14:19:27 lukem Exp $ */ +/* $NetBSD: tr.c,v 1.9 2011/09/06 18:33:46 joerg Exp $ */ /* * Copyright (c) 1988, 1993 @@ -30,21 +30,17 @@ */ #include -#if 0 #ifndef lint __COPYRIGHT("@(#) Copyright (c) 1988, 1993\ The Regents of the University of California. All rights reserved."); #endif /* not lint */ -#endif -#if 0 #ifndef lint #if 0 static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; #endif -__RCSID("$NetBSD: tr.c,v 1.8 2008/07/21 14:19:27 lukem Exp $"); +__RCSID("$NetBSD: tr.c,v 1.9 2011/09/06 18:33:46 joerg Exp $"); #endif /* not lint */ -#endif #include @@ -54,7 +50,7 @@ __RCSID("$NetBSD: tr.c,v 1.8 2008/07/21 14:19:27 lukem Exp $"); #include #include -#include "tr.h" +#include "extern.h" static int string1[NCHARS] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */ @@ -94,14 +90,11 @@ static int string1[NCHARS] = { STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; -int main (int, char **); -static void setup (int *, char *, STR *, int); -static void usage (void); +static void setup(int *, char *, STR *, int); +__dead static void usage(void); int -main(argc, argv) - int argc; - char **argv; +main(int argc, char **argv) { int ch, cnt, lastch, *p; int cflag, dflag, sflag, isstring2; @@ -198,17 +191,15 @@ main(argc, argv) if (!isstring2) usage(); - s1.str = (unsigned char *) argv[0]; - s2.str = (unsigned char *) argv[1]; + s1.str = argv[0]; + s2.str = argv[1]; if (cflag) for (cnt = NCHARS, p = string1; cnt--;) *p++ = OOBCH; - if (!next(&s2)) { - fprintf(stderr, "empty string2\n"); - exit(1); - } + if (!next(&s2)) + errx(1, "empty string2"); /* If string2 runs out of characters, use the last one specified. */ if (sflag) @@ -242,15 +233,11 @@ main(argc, argv) } static void -setup(string, arg, str, cflag) - int *string; - char *arg; - STR *str; - int cflag; +setup(int *string, char *arg, STR *str, int cflag) { int cnt, *p; - str->str = (unsigned char *) arg; + str->str = arg; memset(string, 0, NCHARS * sizeof(int)); while (next(str)) string[str->lastch] = 1; @@ -260,7 +247,7 @@ setup(string, arg, str, cflag) } static void -usage() +usage(void) { (void)fprintf(stderr, "usage: tr [-cs] string1 string2\n"); (void)fprintf(stderr, " tr [-c] -d string1\n"); -- 2.44.0