]> Zhao Yanbai Git Server - minix.git/commitdiff
New sed
authorBen Gras <ben@minix3.org>
Thu, 2 Nov 2006 16:48:15 +0000 (16:48 +0000)
committerBen Gras <ben@minix3.org>
Thu, 2 Nov 2006 16:48:15 +0000 (16:48 +0000)
commands/sed/BUGS [new file with mode: 0644]
commands/sed/Makefile [new file with mode: 0644]
commands/sed/README [new file with mode: 0644]
commands/sed/sed.h [new file with mode: 0644]
commands/sed/sedcomp.c [new file with mode: 0644]
commands/sed/sedexec.c [new file with mode: 0644]

diff --git a/commands/sed/BUGS b/commands/sed/BUGS
new file mode 100644 (file)
index 0000000..b1e19bc
--- /dev/null
@@ -0,0 +1,10 @@
+
+So far no regression over the historic sed are known. If you find a bug,
+please provide a test-case (.sed, .in and .out, look into tests/) - if
+possible try to debug the problem and propose a patch.
+
+We will focus on POSIX conformance and small size - GNU sed extensions are
+most likely not accepted.
+
+Please report issues to: Rene Rebe <rene@exactcode.de>
+
diff --git a/commands/sed/Makefile b/commands/sed/Makefile
new file mode 100644 (file)
index 0000000..111c278
--- /dev/null
@@ -0,0 +1,22 @@
+# Makefile for minised
+
+# If your compiler does not support this flags, just remove them.
+# They only ensure that no new warning regressions make it into the source.
+CFLAGS = -Wall -Wwrite-strings
+
+minised: sedcomp.o sedexec.o
+       $(CC) $(LFLAGS) sedcomp.o sedexec.o -o minised
+
+sedcomp.o: sedcomp.c sed.h
+sedexec.o: sedexec.c sed.h
+
+install: minised
+       install -o bin -m 755 minised /usr/bin/
+       install -o bin -m 755 minised /bin/
+
+clean:
+       rm -f minised sedcomp.o sedexec.o
+
+check: minised
+       cd tests; ./run ../minised
+
diff --git a/commands/sed/README b/commands/sed/README
new file mode 100644 (file)
index 0000000..b832d53
--- /dev/null
@@ -0,0 +1,81 @@
+                               small-sed
+               by Eric S. Raymond, <esr@snark.thyrsus.com>
+                    and Rene Rebe <rene@exactcode.de>
+
+This is a smaller, cheaper, and faster SED utility. Minix uses it. GNU used
+to use it, until they built their own sed around an extended (some would
+say over-extended) regexp package and it is used for embedded tasks (for
+example by the T2 SDE - http://www.t2-project.org).
+
+The original sed 1.0 was written in three pieces; sed.h, sedcomp.c, sedexec.c.
+Some Minix hacker ran them together into a single-file version, mnsed.c which
+is not supported and shipped these days; if changes are needed for Minix please
+send a patch to the normal source.
+
+The 1.2 version (9 Oct 1996) add mnsed's support for detecting
+truncated hold spaces. The mnsed version is missing one feature in
+of the 1.2 version; support of +. Also, the multiple-file I/O is
+organized slightly differently.
+
+The 1.3 version added a bug fix by Tom Oehser, and the `L' command.  Also
+this program is now distributed under GPL.
+
+The 1.5 version incooperated a lot of bug fixes by Rene Rebe as well as
+a real test suite. Also the function declaration and definition have been
+converted from the K&R C to ANSI C.
+
+The 1.6 version includes support for the n'th match for the substitude command
+as well as support for predefined character classes and only writes lines
+with newline if one was present in the input line (compatible with GNU sed).
+
+The 1.7 version fixed a segmentation fault with empty regular expressions,
+not to leak other buffer content for groups of commands and escaping
+numerical seperators in regular expressions by disabling obscure code.
+Additionally compilation with older compilers as well as warnings with the
+latest gcc versions have been corrected.
+
+The 1.8 version fixes matching of some escaped characters (a regression
+introduced with \+ star matching), \+ star matching to corretly copy
+and mark the internal bytecode representation, back references inside lhs
+regular expressions matching (to work at all) and marking the correct
+regular expression for star matches.
+
+The 1.9 version included a microoptimization shaving some bytes off the
+binary and some cpu cycles at run time, reusing the previous regular
+expressions for empty ones, predefined character classes with control
+characters, handling of escaped ampesands and support for backreference
+\0 and Kleene star operator on groups.
+
+The 1.10 version fixed a special case of grouped star matching where
+\+1..n overwrote the last match, not to infinite loop on certain zero match
+grouped star cases and not to crash on w(rite to file). The version also
+no longer falls into the conservative end-of-file matching mode when just
+end-of-line matching was used.
+
+The 1.11 version again fixed w(rite to file) handling to correctly honor
+/dev/stdout and /dev/stderr as GNU sed does and thus keep the streams in
+sync. Some unused variables have been removed and a two diagnostics
+fixed to be printed correctly.
+
+The 1.12 version fixed the l(ist) command to actually work, some tiny
+optimizations have been performed as well as some more compiler warnings
+fixed.
+
+Makefile       -- how to build sed
+sed.h          -- declarations and structures
+sedcomp.c      -- sed pattern compilation
+sedexec.c      -- sed program execution
+sed.1          -- source for the man page
+tests/         -- a small set of sed tests
+
+For some releases the man page in the man format.
+
+Surf to
+
+   http://www.exactcode.de/oss/minised/
+   http://www.catb.org/~esr/
+
+for updates of this software. There is a sed FAQ kept at these
+locations:
+
+   http://www.dreamwvr.com/sed-info/sed-faq.html
diff --git a/commands/sed/sed.h b/commands/sed/sed.h
new file mode 100644 (file)
index 0000000..13db920
--- /dev/null
@@ -0,0 +1,85 @@
+/* sed.h -- types and constants for the stream editor
+   Copyright (C) 1995-2003 Eric S. Raymond
+   Copyright (C) 2004-2005 Rene Rebe
+*/
+
+#define TRUE            1
+#define FALSE           0
+
+/* data area sizes used by both modules */
+#define MAXBUF         4000    /* current line buffer size */
+#define MAXAPPENDS     20      /* maximum number of appends */
+#define MAXTAGS                9       /* tagged patterns are \1 to \9 */
+#define MAXCMDS                200     /* maximum number of compiled commands */
+#define MAXLINES       256     /* max # numeric addresses to compile */ 
+
+/* constants for compiled-command representation */
+#define EQCMD  0x01    /* = -- print current line number               */
+#define ACMD   0x02    /* a -- append text after current line  */
+#define BCMD   0x03    /* b -- branch to label                         */
+#define CCMD   0x04    /* c -- change current line             */
+#define DCMD   0x05    /* d -- delete all of pattern space             */
+#define CDCMD  0x06    /* D -- delete first line of pattern space      */
+#define GCMD   0x07    /* g -- copy hold space to pattern space        */
+#define CGCMD  0x08    /* G -- append hold space to pattern space      */
+#define HCMD   0x09    /* h -- copy pattern space to hold space        */
+#define CHCMD  0x0A    /* H -- append hold space to pattern space      */
+#define ICMD   0x0B    /* i -- insert text before current line         */
+#define LCMD   0x0C    /* l -- print pattern space in escaped form     */
+#define CLCMD   0x20   /* L -- hexdump                                 */
+#define NCMD   0x0D    /* n -- get next line into pattern space        */
+#define CNCMD  0x0E    /* N -- append next line to pattern space       */
+#define PCMD   0x0F    /* p -- print pattern space to output           */
+#define CPCMD  0x10    /* P -- print first line of pattern space       */
+#define QCMD   0x11    /* q -- exit the stream editor                  */
+#define RCMD   0x12    /* r -- read in a file after current line */
+#define SCMD   0x13    /* s -- regular-expression substitute           */
+#define TCMD   0x14    /* t -- branch on last substitute successful    */
+#define CTCMD  0x15    /* T -- branch on last substitute failed        */
+#define WCMD   0x16    /* w -- write pattern space to file             */
+#define CWCMD  0x17    /* W -- write first line of pattern space       */
+#define XCMD   0x18    /* x -- exhange pattern and hold spaces         */
+#define YCMD   0x19    /* y -- transliterate text                      */
+
+typedef struct cmd_t                   /* compiled-command representation */
+{
+       char    *addr1;                 /* first address for command */
+       char    *addr2;                 /* second address for command */
+       union
+       {
+               char            *lhs;   /* s command lhs */
+               struct cmd_t    *link;  /* label link */
+       } u;
+       char    command;                /* command code */
+       char    *rhs;                   /* s command replacement string */
+       FILE    *fout;                  /* associated output file descriptor */
+       struct
+       {
+               unsigned allbut  : 1;   /* was negation specified? */
+               unsigned global  : 1;   /* was p postfix specified? */
+               unsigned print   : 2;   /* was g postfix specified? */
+               unsigned inrange : 1;   /* in an address range? */
+       } flags;
+       unsigned nth;                   /* sed nth occurance */
+}
+sedcmd;                /* use this name for declarations */
+
+#define BAD    ((char *) -1)           /* guaranteed not a string ptr */
+
+/* address and regular expression compiled-form markers */
+#define STAR   1       /* marker for Kleene star */
+#define CCHR   2       /* non-newline character to be matched follows */
+#define CDOT   4       /* dot wild-card marker */
+#define CCL    6       /* character class follows */
+#define CNL    8       /* match line start */
+#define CDOL   10      /* match line end */
+#define CBRA   12      /* tagged pattern start marker */
+#define CKET   14      /* tagged pattern end marker */
+#define CBACK  16      /* backslash-digit pair marker */
+#define CLNUM  18      /* numeric-address index follows */
+#define CEND   20      /* symbol for end-of-source */
+#define CEOF   22      /* end-of-field mark */
+
+#define bits(b) (1 << (b))
+
+/* sed.h ends here */
diff --git a/commands/sed/sedcomp.c b/commands/sed/sedcomp.c
new file mode 100644 (file)
index 0000000..95eb8c5
--- /dev/null
@@ -0,0 +1,956 @@
+/* sedcomp.c -- stream editor main and compilation phase
+   Copyright (C) 1995-2003 Eric S. Raymond
+   Copyright (C) 2004-2006 Rene Rebe
+
+   The stream editor compiles its command input  (from files or -e options)
+into an internal form using compile() then executes the compiled form using
+execute(). Main() just initializes data structures, interprets command line
+options, and calls compile() and execute() in appropriate sequence.
+   The data structure produced by compile() is an array of compiled-command
+structures (type sedcmd).  These contain several pointers into pool[], the
+regular-expression and text-data pool, plus a command code and g & p flags.
+In the special case that the command is a label the struct  will hold a ptr
+into the labels array labels[] during most of the compile,  until resolve()
+resolves references at the end.
+   The operation of execute() is described in its source module. 
+*/
+
+#include <stdlib.h>            /* exit */
+#include <stdio.h>             /* uses getc, fprintf, fopen, fclose */
+#include <ctype.h>             /* isdigit */
+#include <string.h>            /* strcmp */
+#include "sed.h"               /* command type struct and name defines */
+
+/***** public stuff ******/
+
+#define MAXCMDS                200     /* maximum number of compiled commands */
+#define MAXLINES       256     /* max # numeric addresses to compile */ 
+
+/* main data areas */
+char   linebuf[MAXBUF+1];      /* current-line buffer */
+sedcmd cmds[MAXCMDS+1];        /* hold compiled commands */
+long   linenum[MAXLINES];      /* numeric-addresses table */
+
+/* miscellaneous shared variables */ 
+int    nflag;                  /* -n option flag */
+int    eargc;                  /* scratch copy of argument count */
+sedcmd *pending        = NULL; /* next command to be executed */
+
+int    last_line_used = 0;     /* last line address ($) was used */
+
+void die (const char* msg) {
+       fprintf(stderr, "sed: ");
+       fprintf(stderr, msg, linebuf);
+       fprintf(stderr, "\n");
+       exit(2);
+}
+
+/***** module common stuff *****/
+
+#define POOLSIZE       10000   /* size of string-pool space */
+#define WFILES         10      /* max # w output files that can be compiled */
+#define        RELIMIT         256     /* max chars in compiled RE */
+#define        MAXDEPTH        20      /* maximum {}-nesting level */
+#define        MAXLABS         50      /* max # of labels that can be handled */
+
+#define SKIPWS(pc)     while ((*pc==' ') || (*pc=='\t')) pc++
+#define IFEQ(x, v)     if (*x == v) x++ , /* do expression */
+
+/* error messages */
+static char    AGMSG[] = "garbled address %s";
+static char    CGMSG[] = "garbled command %s";
+static char    TMTXT[] = "too much text: %s";
+static char    AD1NG[] = "no addresses allowed for %s";
+static char    AD2NG[] = "only one address allowed for %s";
+static char    TMCDS[] = "too many commands, last was %s";
+static char    COCFI[] = "cannot open command-file %s";
+static char    UFLAG[] = "unknown flag %c";
+/*static char  COOFI[] = "cannot open %s";*/
+static char    CCOFI[] = "cannot create %s";
+static char    ULABL[] = "undefined label %s";
+static char    TMLBR[] = "too many {'s";
+static char    FRENL[] = "first RE must be non-null";
+static char    NSCAX[] = "no such command as %s";
+static char    TMRBR[] = "too many }'s";
+static char    DLABL[] = "duplicate label %s";
+static char    TMLAB[] = "too many labels: %s";
+static char    TMWFI[] = "too many w files";
+static char    REITL[] = "RE too long: %s";
+static char    TMLNR[] = "too many line numbers";
+static char    TRAIL[] = "command \"%s\" has trailing garbage";
+static char    RETER[] = "RE not terminated: %s";
+static char    CCERR[] = "unknown character class: %s";
+
+/* cclass to c function mapping ,-) */
+const char* cclasses[] = {
+       "alnum", "a-zA-Z0-9",
+       "lower", "a-z",
+       "space", " \f\n\r\t\v",
+       "alpha", "a-zA-Z",
+       "digit", "0-9",
+       "upper", "A-Z",
+       "blank", " \t",
+       "xdigit", "0-9A-Fa-f",
+       "cntrl", "\x01-\x1f\x7e",
+       "print", " -\x7e",
+       "graph", "!-\x7e",
+       "punct", "!-/:-@[-`{-\x7e",
+       NULL, NULL};
+typedef struct                 /* represent a command label */
+{
+       char            *name;          /* the label name */
+       sedcmd          *last;          /* it's on the label search list */  
+       sedcmd          *address;       /* pointer to the cmd it labels */
+} label;
+
+/* label handling */
+static label   labels[MAXLABS];        /* here's the label table */
+static label   *lab    = labels + 1;   /* pointer to current label */
+static label   *lablst = labels;       /* header for search list */
+
+/* string pool for regular expressions, append text, etc. etc. */
+static char    pool[POOLSIZE];                 /* the pool */
+static char    *fp     = pool;                 /* current pool pointer */
+static char    *poolend = pool + POOLSIZE;     /* pointer past pool end */
+
+/* compilation state */
+static FILE    *cmdf   = NULL;         /* current command source */
+static char    *cp     = linebuf;      /* compile pointer */
+static sedcmd  *cmdp   = cmds;         /* current compiled-cmd ptr */
+static char    *lastre = NULL;         /* old RE pointer */
+static int     bdepth  = 0;            /* current {}-nesting level */
+static int     bcount  = 0;            /* # tagged patterns in current RE */
+static char    **eargv;                /* scratch copy of argument list */
+
+/* compilation flags */
+static int     eflag;                  /* -e option flag */
+static int     gflag;                  /* -g option flag */
+
+/* prototypes */
+static char *address(char *expbuf);
+static char *gettext(char* txp);
+static char *recomp(char *expbuf, char redelim);
+static char *rhscomp(char* rhsp, char delim);
+static char *ycomp(char *ep, char delim);
+static int cmdcomp(char cchar);
+static int cmdline(char        *cbuf);
+static label *search(label *ptr);
+static void compile(void);
+static void resolve(void);
+
+/* sedexec.c protypes */
+void execute(char* file);
+
+/* main sequence of the stream editor */
+int main(int argc, char *argv[])
+{
+       eargc   = argc;         /* set local copy of argument count */
+       eargv   = argv;         /* set local copy of argument list */
+       cmdp->addr1 = pool;     /* 1st addr expand will be at pool start */
+       if (eargc == 1)
+               exit(0);        /* exit immediately if no arguments */
+
+       /* scan through the arguments, interpreting each one */
+       while ((--eargc > 0) && (**++eargv == '-'))
+               switch (eargv[0][1])
+               {
+               case 'e':
+                       eflag++; compile();     /* compile with e flag on */
+                       eflag = 0;
+                       continue;               /* get another argument */
+               case 'f':
+                       if (eargc-- <= 0)       /* barf if no -f file */
+                               exit(2);
+                       if ((cmdf = fopen(*++eargv, "r")) == NULL)
+                       {
+                               fprintf(stderr, COCFI, *eargv);
+                               exit(2);
+                       }
+                       compile();      /* file is O.K., compile it */
+                       fclose(cmdf);
+                       continue;       /* go back for another argument */
+               case 'g':
+                       gflag++;        /* set global flag on all s cmds */
+                       continue;
+               case 'n':
+                       nflag++;        /* no print except on p flag or w */
+                       continue;
+               default:
+                       fprintf(stdout, UFLAG, eargv[0][1]);
+                       continue;
+               }
+
+       if (cmdp == cmds)       /* no commands have been compiled */
+       {
+               eargv--; eargc++;
+               eflag++; compile(); eflag = 0;
+               eargv++; eargc--;
+       }
+
+       if (bdepth)     /* we have unbalanced squigglies */
+               die(TMLBR);
+
+       lablst->address = cmdp; /* set up header of label linked list */
+       resolve();              /* resolve label table indirections */
+       if (eargc <= 0)         /* if there were no -e commands */
+               execute(NULL);  /*   execute commands from stdin only */
+       else while(--eargc>=0)  /* else execute only -e commands */
+               execute(*eargv++);
+       exit(0);                /* everything was O.K. if we got here */
+}
+
+#define        H       0x80    /* 128 bit, on if there's really code for command */
+#define LOWCMD 56      /* = '8', lowest char indexed in cmdmask */ 
+
+/* indirect through this to get command internal code, if it exists */
+static char    cmdmask[] =
+{
+       0,      0,      H,      0,      0,      H+EQCMD,0,      0,
+       0,      0,      0,      0,      H+CDCMD,0,      0,      CGCMD,
+       CHCMD,  0,      0,      0,      H+CLCMD,0,      CNCMD,  0,
+       CPCMD,  0,      0,      0,      H+CTCMD,0,      0,      H+CWCMD,
+       0,      0,      0,      0,      0,      0,      0,      0,
+       0,      H+ACMD, H+BCMD, H+CCMD, DCMD,   0,      0,      GCMD,
+       HCMD,   H+ICMD, 0,      0,      H+LCMD, 0,      NCMD,   0,
+       PCMD,   H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0,      0,      H+WCMD,
+       XCMD,   H+YCMD, 0,      H+BCMD, 0,      H,      0,      0,
+};
+
+/* precompile sed commands out of a file */
+static void compile(void)
+{
+       char    ccode;
+
+       for(;;)                                 /* main compilation loop */
+       {
+               SKIPWS(cp);
+               if (*cp == ';') {
+                       cp++;
+                       SKIPWS(cp);
+               }
+
+               if (*cp == '\0' || *cp == '#')  /* get a new command line */
+                       if (cmdline(cp = linebuf) < 0)
+                               break;
+               SKIPWS(cp);
+
+               if (*cp == '\0' || *cp == '#')  /* a comment */
+                       continue;
+
+               /* compile first address */
+               if (fp > poolend)
+                       die(TMTXT);
+               else if ((fp = address(cmdp->addr1 = fp)) == BAD)
+                       die(AGMSG);
+
+               if (fp == cmdp->addr1)          /* if empty RE was found */
+               {
+                       if (lastre)             /* if there was previous RE */
+                               cmdp->addr1 = lastre;   /* use it */
+                       else
+                               die(FRENL);
+               }
+               else if (fp == NULL)            /* if fp was NULL */
+               {
+                       fp = cmdp->addr1;       /* use current pool location */
+                       cmdp->addr1 = NULL;
+               }
+               else
+               {
+                       lastre = cmdp->addr1;
+                       if (*cp == ',' || *cp == ';')   /* there's 2nd addr */
+                       {
+                               cp++;
+                               if (fp > poolend) die(TMTXT);
+                               fp = address(cmdp->addr2 = fp);
+                               if (fp == BAD || fp == NULL) die(AGMSG);
+                               if (fp == cmdp->addr2)
+                                       cmdp->addr2 = lastre;
+                               else
+                                       lastre = cmdp->addr2;
+                       }
+                       else
+                               cmdp->addr2 = NULL;     /* no 2nd address */
+               }
+               if (fp > poolend) die(TMTXT);
+
+               SKIPWS(cp);             /* discard whitespace after address */
+
+               if (*cp == '!') {
+                       cmdp->flags.allbut = 1;
+                       cp++; SKIPWS(cp);
+               }
+
+               /* get cmd char, range-check it */
+               if ((*cp < LOWCMD) || (*cp > '~')
+                       || ((ccode = cmdmask[*cp - LOWCMD]) == 0))
+                               die(NSCAX);
+
+               cmdp->command = ccode & ~H;     /* fill in command value */
+               if ((ccode & H) == 0)           /* if no compile-time code */
+                       cp++;                   /* discard command char */
+               else if (cmdcomp(*cp++))        /* execute it; if ret = 1 */
+                       continue;               /* skip next line read */
+
+               if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
+
+               SKIPWS(cp);                     /* look for trailing stuff */
+               if (*cp != '\0')
+               {
+                       if (*cp == ';')
+                       {
+                               continue;
+                       }
+                       else if (*cp != '#' && *cp != '}')
+                               die(TRAIL);
+               }
+       }
+}
+
+/* compile a single command */
+static int cmdcomp(char cchar)
+{
+       static sedcmd   **cmpstk[MAXDEPTH];     /* current cmd stack for {} */
+       static const char *fname[WFILES];       /* w file name pointers */
+       static FILE     *fout[WFILES];          /* w file file ptrs */
+       static int      nwfiles = 2;            /* count of open w files */
+       int             i;                      /* indexing dummy used in w */
+       sedcmd          *sp1, *sp2;             /* temps for label searches */
+       label           *lpt;                   /* ditto, and the searcher */
+       char            redelim;                /* current RE delimiter */
+
+       fout[0] = stdout;
+       fout[1] = stderr;
+       
+       fname[0] = "/dev/stdout";
+       fname[1] = "/dev/stderr";
+
+       switch(cchar)
+       {
+       case '{':       /* start command group */
+               cmdp->flags.allbut = !cmdp->flags.allbut;
+               cmpstk[bdepth++] = &(cmdp->u.link);
+               if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
+               if (*cp == '\0') *cp++ = ';', *cp = '\0';       /* get next cmd w/o lineread */
+               return(1);
+
+       case '}':       /* end command group */
+               if (cmdp->addr1) die(AD1NG);    /* no addresses allowed */
+               if (--bdepth < 0) die(TMRBR);   /* too many right braces */
+               *cmpstk[bdepth] = cmdp;         /* set the jump address */
+               return(1);
+
+       case '=':                       /* print current source line number */
+       case 'q':                       /* exit the stream editor */
+               if (cmdp->addr2) die(AD2NG);
+               break;
+
+       case ':':       /* label declaration */
+               if (cmdp->addr1) die(AD1NG);    /* no addresses allowed */
+               fp = gettext(lab->name = fp);   /* get the label name */
+               if ((lpt = search(lab)))        /* does it have a double? */
+               {
+                       if (lpt->address) die(DLABL);   /* yes, abort */
+               }
+               else    /* check that it doesn't overflow label table */
+               {
+                       lab->last = NULL;
+                       lpt = lab;
+                       if (++lab >= labels + MAXLABS) die(TMLAB);
+               }
+               lpt->address = cmdp;
+               return(1);
+
+       case 'b':       /* branch command */
+       case 't':       /* branch-on-succeed command */
+       case 'T':       /* branch-on-fail command */
+               SKIPWS(cp);
+               if (*cp == '\0')        /* if branch is to start of cmds... */
+               {
+                       /* add current command to end of label last */
+                       if ((sp1 = lablst->last)) 
+                       {
+                               while((sp2 = sp1->u.link))
+                                       sp1 = sp2;
+                               sp1->u.link = cmdp;
+                       }
+                       else    /* lablst->last == NULL */
+                               lablst->last = cmdp;
+                       break;
+               }
+               fp = gettext(lab->name = fp);   /* else get label into pool */
+               if ((lpt = search(lab)))        /* enter branch to it */
+               {
+                       if (lpt->address)
+                               cmdp->u.link = lpt->address;
+                       else
+                       {
+                               sp1 = lpt->last;
+                               while((sp2 = sp1->u.link))
+                                       sp1 = sp2;
+                               sp1->u.link = cmdp;
+                       }
+               }
+               else            /* matching named label not found */
+               {
+                       lab->last = cmdp;       /* add the new label */
+                       lab->address = NULL;    /* it's forward of here */
+                       if (++lab >= labels + MAXLABS)  /* overflow if last */
+                               die(TMLAB);
+               }
+               break;
+
+       case 'a':       /* append text */
+       case 'i':       /* insert text */
+       case 'r':       /* read file into stream */
+               if (cmdp->addr2) die(AD2NG);
+       case 'c':       /* change text */
+               if ((*cp == '\\') && (*++cp == '\n')) cp++;
+               fp = gettext(cmdp->u.lhs = fp);
+               break;
+
+       case 'D':       /* delete current line in hold space */
+               cmdp->u.link = cmds;
+               break;
+
+       case 's':       /* substitute regular expression */
+               if (*cp == 0) /* get delimiter from 1st ch */
+                       die(RETER);
+               else
+                       redelim = *cp++;
+               
+               if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD)
+                       die(CGMSG);
+               if (fp == cmdp->u.lhs) {        /* if compiled RE zero len */ 
+                       if (lastre) {
+                               cmdp->u.lhs = lastre;   /* use the previous one */
+                               cp++;                   /*   skip delim */
+                       }
+                       else
+                               die(FRENL);
+               }
+               else                            /* otherwise */
+                       lastre = cmdp->u.lhs;   /*   save the one just found */
+               
+               if ((cmdp->rhs = fp) > poolend) die(TMTXT);
+               if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) die(CGMSG);
+               if (gflag) cmdp->flags.global++;
+               while (*cp == 'g' || *cp == 'p' || *cp == 'P' || isdigit(*cp))
+               {
+                       IFEQ(cp, 'g') cmdp->flags.global++;
+                       IFEQ(cp, 'p') cmdp->flags.print = 1;
+                       IFEQ(cp, 'P') cmdp->flags.print = 2;
+                       if(isdigit(*cp))
+                       {
+                               if (cmdp->nth)
+                                       break; /* no multiple n args */
+                               
+                               cmdp->nth = atoi(cp); /* check 0? */
+                               while (isdigit(*cp)) cp++;
+                       }
+               }
+
+       case 'l':       /* list pattern space */
+       case 'L':       /* dump pattern space */
+               if (*cp == 'w')
+                       cp++;           /* and execute a w command! */
+               else
+                       break;          /* s or L or l is done */
+
+       case 'w':       /* write-pattern-space command */
+       case 'W':       /* write-first-line command */
+               if (nwfiles >= WFILES) die(TMWFI);
+               fname[nwfiles] = fp;
+               fp = gettext((fname[nwfiles] = fp, fp));        /* filename will be in pool */
+               for(i = nwfiles-1; i >= 0; i--) /* match it in table */
+                       if (strcmp(fname[nwfiles], fname[i]) == 0)
+                       {
+                               cmdp->fout = fout[i];
+                               return(0);
+                       }
+               /* if didn't find one, open new out file */
+               if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL)
+               {
+                       fprintf(stderr, CCOFI, fname[nwfiles]);
+                       exit(2);
+               }
+               fout[nwfiles++] = cmdp->fout;
+               break;
+
+       case 'y':       /* transliterate text */
+               fp = ycomp(cmdp->u.lhs = fp, *cp++);    /* compile translit */
+               if (fp == BAD) die(CGMSG);              /* fail on bad form */
+               if (fp > poolend) die(TMTXT);           /* fail on overflow */
+               break;
+       }
+       return(0);      /* succeeded in interpreting one command */
+}
+
+/* generate replacement string for substitute command right hand side
+   rhsp:       place to compile expression to
+   delim:      regular-expression end-mark to look for */
+static char *rhscomp(char* rhsp, char delim)   /* uses bcount */
+{
+       register char   *p = cp;
+
+       for(;;)
+               /* copy for the likely case it is not s.th. special */
+               if ((*rhsp = *p++) == '\\') /* back reference or escape  */
+               {
+                       if (*p >= '0' && *p <= '9') /* back reference */
+                       {
+                       dobackref:
+                               *rhsp = *p++;
+                               /* check validity of pattern tag */
+                               if (*rhsp > bcount + '0')
+                                       return(BAD);
+                               *rhsp++ |= 0x80; /* mark the good ones */
+                       }
+                       else /* escape */
+                       {
+                               switch (*p) {
+                                       case 'n': *rhsp = '\n'; break;
+                                       case 'r': *rhsp = '\r'; break;
+                                       case 't': *rhsp = '\t'; break;
+                                       default: *rhsp = *p;
+                               }
+                               rhsp++; p++;
+                       }
+               }
+               else if (*rhsp == delim)        /* found RE end, hooray... */
+               {
+                       *rhsp++ = '\0';         /* cap the expression string */
+                       cp = p;
+                       return(rhsp);           /* pt at 1 past the RE */
+               }
+               else if (*rhsp == '&')          /* special case, convert to backref \0 */
+               {
+                       *--p = '0';
+                       goto dobackref;
+               }
+               else if (*rhsp++ == '\0')       /* last ch not RE end, help! */
+                       return(BAD);
+}
+
+/* compile a regular expression to internal form
+   expbuf:     place to compile it to
+   redelim:    RE end-marker to look for */
+static char *recomp(char *expbuf, char redelim)        /* uses cp, bcount */
+{
+       register char   *ep = expbuf;   /* current-compiled-char pointer */
+       register char   *sp = cp;       /* source-character ptr */
+       register int    c;              /* current-character pointer */
+       char            negclass;       /* all-but flag */
+       char            *lastep;        /* ptr to last expr compiled */
+       char            *lastep2;       /* dito, but from the last loop */
+       char            *svclass;       /* start of current char class */
+       char            brnest[MAXTAGS];        /* bracket-nesting array */
+       char            *brnestp;       /* ptr to current bracket-nest */
+       char            *pp;            /* scratch pointer */
+       int             classct;        /* class element count */
+       int             tags;           /* # of closed tags */
+
+       if (*cp == redelim) {           /* if first char is RE endmarker */
+           return(ep);
+       }
+
+       lastep = lastep2 = NULL;        /* there's no previous RE */
+       brnestp = brnest;               /* initialize ptr to brnest array */
+       tags = bcount = 0;              /* initialize counters */
+
+       if ((*ep++ = (*sp == '^')))     /* check for start-of-line syntax */
+               sp++;
+
+       for (;;)
+       {
+               if (*sp == 0) /* no termination */
+                       die (RETER);
+               if (ep >= expbuf + RELIMIT)     /* match is too large */
+                       return(cp = sp, BAD);
+               if ((c = *sp++) == redelim)     /* found the end of the RE */
+               {
+                       cp = sp;
+                       if (brnestp != brnest)  /* \(, \) unbalanced */
+                               return(BAD);
+                       *ep++ = CEOF;           /* write end-of-pattern mark */
+                       return(ep);             /* return ptr to compiled RE */
+               }
+
+               lastep = lastep2;
+               lastep2 = ep;
+
+               switch (c)
+               {
+               case '\\':
+                       if ((c = *sp++) == '(') /* start tagged section */
+                       {
+                               if (bcount >= MAXTAGS)
+                                       return(cp = sp, BAD);
+                               *brnestp++ = bcount;    /* update tag stack */
+                               *ep++ = CBRA;           /* enter tag-start */
+                               *ep++ = bcount++;       /* bump tag count */
+                               lastep2 = NULL;
+                               continue;
+                       }
+                       else if (c == ')')      /* end tagged section */
+                       {
+                               if (brnestp <= brnest)  /* extra \) */
+                                       return(cp = sp, BAD);
+                               *ep++ = CKET;           /* enter end-of-tag */
+                               *ep++ = *--brnestp;     /* pop tag stack */
+                               tags++;                 /* count closed tags */
+                               for (lastep2 = ep-1; *lastep2 != CBRA; )
+                                       --lastep2; /* FIXME: lastep becomes start */
+                               continue;
+                       }
+                       else if (c >= '1' && c <= '9' && c != redelim)  /* tag use, if !delim */
+                       {
+                               if ((c -= '1') >= tags) /* too few */
+                                       return(BAD);
+                               *ep++ = CBACK;          /* enter tag mark */
+                               *ep++ = c;              /* and the number */
+                               continue;
+                       }
+                       else if (c == '\n')     /* escaped newline no good */
+                               return(cp = sp, BAD);
+                       else if (c == 'n')              /* match a newline */
+                               c = '\n';
+                       else if (c == 't')              /* match a tab */
+                               c = '\t';
+                       else if (c == 'r')              /* match a return */
+                               c = '\r';
+                       else if (c == '+') /* 1..n repeat of previous pattern */
+                       {
+                         if (lastep == NULL)   /* if + not first on line */
+                               goto defchar;   /*   match a literal + */
+                         pp = ep;              /* else save old ep */
+                         *ep++ = *lastep++ | STAR;     /* flag the copy */
+                         while (lastep < pp)   /* so we can blt the pattern */
+                               *ep++ = *lastep++;
+                         lastep2 = lastep;       /* no new expression */
+                         continue;
+                       }
+                       goto defchar;           /* else match \c */
+
+               case '\0':      /* ignore nuls */
+                       continue;
+
+               case '\n':      /* trailing pattern delimiter is missing */
+                       return(cp = sp, BAD);
+
+               case '.':       /* match any char except newline */
+                       *ep++ = CDOT;
+                       continue;
+
+               case '*':       /* 0..n repeat of previous pattern */
+                       if (lastep == NULL)     /* if * isn't first on line */
+                               goto defchar;   /*   match a literal * */
+                       *lastep |= STAR;        /* flag previous pattern */
+                       lastep2 = lastep;       /* no new expression */
+                       continue;
+
+               case '$':       /* match only end-of-line */
+                       if (*sp != redelim)     /* if we're not at end of RE */
+                               goto defchar;   /*   match a literal $ */
+                       *ep++ = CDOL;           /* insert end-symbol mark */
+                       continue;
+
+               case '[':       /* begin character set pattern */
+                       if (ep + 17 >= expbuf + RELIMIT)
+                               die(REITL);
+                       *ep++ = CCL;            /* insert class mark */
+                       if ((negclass = ((c = *sp++) == '^')))
+                               c = *sp++;
+                       svclass = sp;           /* save ptr to class start */
+                       do {
+                               if (c == '\0') die(CGMSG);
+                               /* handle predefined character classes */
+                               if (c == '[' && *sp == ':')
+                               {
+                                 /* look for the matching ":]]" */
+                                 char *p;
+                                 const char *p2;
+                                 for (p = sp+3; *p; p++)
+                                   if  (*p == ']' &&
+                                        *(p-1) == ']' &&
+                                        *(p-2) == ':')
+                                       {
+                                         char cc[8];
+                                         const char **it;
+                                         p2 = sp+1;
+                                         for (p2 = sp+1;
+                                              p2 < p-2 && p2-sp-1 < sizeof(cc);
+                                              p2++)
+                                           cc[p2-sp-1] = *p2;
+                                         cc[p2-sp-1] = 0; /* termination */
+
+                                         it = cclasses;
+                                         while (*it && strcmp(*it, cc))
+                                               it +=2;
+                                         if (!*it++)
+                                           die(CCERR);
+
+                                         /* generate mask */
+                                         p2 = *it;
+                                         while (*p2) {
+                                           if (p2[1] == '-' && p2[2]) {
+                                               for (c = *p2; c <= p2[2]; c++)
+                                                 ep[c >> 3] |= bits(c & 7);
+                                               p2 += 3;
+                                           }
+                                           else {
+                                               c = *p2++;
+                                               ep[c >> 3] |= bits(c & 7);
+                                           }
+                                         }
+                                         sp = p; c = 0; break;
+                                       }
+                               }
+
+                               /* handle character ranges */
+                               if (c == '-' && sp > svclass && *sp != ']')
+                                       for (c = sp[-2]; c < *sp; c++)
+                                               ep[c >> 3] |= bits(c & 7);
+
+                               /* handle escape sequences in sets */
+                               if (c == '\\')
+                               {
+                                       if ((c = *sp++) == 'n')
+                                               c = '\n';
+                                       else if (c == 't')
+                                               c = '\t';
+                                       else if (c == 'r')
+                                               c = '\r';
+                               }
+
+                               /* enter (possibly translated) char in set */
+                               if (c)
+                                       ep[c >> 3] |= bits(c & 7);
+                       } while
+                               ((c = *sp++) != ']');
+
+                       /* invert the bitmask if all-but was specified */
+                       if (negclass)
+                               for(classct = 0; classct < 16; classct++)
+                                       ep[classct] ^= 0xFF;
+                       ep[0] &= 0xFE;          /* never match ASCII 0 */ 
+                       ep += 16;               /* advance ep past set mask */
+                       continue;
+
+               defchar:        /* match literal character */
+               default:        /* which is what we'd do by default */
+                       *ep++ = CCHR;           /* insert character mark */
+                       *ep++ = c;
+               }
+       }
+}
+
+/* read next command from -e argument or command file */
+static int cmdline(char        *cbuf)          /* uses eflag, eargc, cmdf */
+{
+       register int    inc;    /* not char because must hold EOF */
+
+       cbuf--;                 /* so pre-increment points us at cbuf */
+
+       /* e command flag is on */
+       if (eflag)
+       {
+               register char   *p;     /* ptr to current -e argument */
+               static char     *savep; /* saves previous value of p */
+
+               if (eflag > 0)  /* there are pending -e arguments */
+               {
+                       eflag = -1;
+                       if (eargc-- <= 0)
+                               exit(2);        /* if no arguments, barf */
+
+                       /* else transcribe next e argument into cbuf */
+                       p = *++eargv;
+                       while((*++cbuf = *p++))
+                               if (*cbuf == '\\')
+                               {
+                                       if ((*++cbuf = *p++) == '\0')
+                                               return(savep = NULL, -1);
+                                       else
+                                               continue;
+                               }
+                               else if (*cbuf == '\n') /* end of 1 cmd line */
+                               { 
+                                       *cbuf = '\0';
+                                       return(savep = p, 1);
+                                       /* we'll be back for the rest... */
+                               }
+
+                       /* found end-of-string; can advance to next argument */
+                       return(savep = NULL, 1);
+               }
+
+               if ((p = savep) == NULL)
+                       return(-1);
+
+               while((*++cbuf = *p++))
+                       if (*cbuf == '\\')
+                       {
+                               if ((*++cbuf = *p++) == '0')
+                                       return(savep = NULL, -1);
+                               else
+                                       continue;
+                       }
+                       else if (*cbuf == '\n')
+                       {
+                               *cbuf = '\0';
+                               return(savep = p, 1);
+                       }
+
+               return(savep = NULL, 1);
+       }
+
+       /* if no -e flag read from command file descriptor */
+       while((inc = getc(cmdf)) != EOF)                /* get next char */
+               if ((*++cbuf = inc) == '\\')            /* if it's escape */ 
+                       *++cbuf = inc = getc(cmdf);     /* get next char */
+               else if (*cbuf == '\n')                 /* end on newline */
+                       return(*cbuf = '\0', 1);        /* cap the string */
+
+       return(*++cbuf = '\0', -1);     /* end-of-file, no more chars */
+}
+
+/* expand an address at *cp... into expbuf, return ptr at following char */
+static char *address(char *expbuf)             /* uses cp, linenum */
+{
+       static int      numl = 0;       /* current ind in addr-number table */
+       register char   *rcp;           /* temp compile ptr for forwd look */
+       long            lno;            /* computed value of numeric address */
+
+       if (*cp == '$')                 /* end-of-source address */
+       {
+               *expbuf++ = CEND;       /* write symbolic end address */
+               *expbuf++ = CEOF;       /* and the end-of-address mark (!) */
+               cp++;                   /* go to next source character */
+               last_line_used = TRUE;
+               return(expbuf);         /* we're done */
+       }
+       if (*cp == '/')                 /* start of regular-expression match */
+               return(recomp(expbuf, *cp++));  /* compile the RE */
+
+       rcp = cp; lno = 0;              /* now handle a numeric address */
+       while(*rcp >= '0' && *rcp <= '9')       /* collect digits */
+               lno = lno*10 + *rcp++ - '0';    /*  compute their value */
+
+       if (rcp > cp)                   /* if we caught a number... */
+       {
+               *expbuf++ = CLNUM;      /* put a numeric-address marker */
+               *expbuf++ = numl;       /* and the address table index */
+               linenum[numl++] = lno;  /* and set the table entry */
+               if (numl >= MAXLINES)   /* oh-oh, address table overflow */
+                       die(TMLNR);     /*   abort with error message */
+               *expbuf++ = CEOF;       /* write the end-of-address marker */
+               cp = rcp;               /* point compile past the address */ 
+               return(expbuf);         /* we're done */
+       }
+
+       return(NULL);           /* no legal address was found */
+}
+
+/* accept multiline input from *cp..., discarding leading whitespace
+   txp: where to put the text */
+static char *gettext(char* txp)                /* uses global cp */
+{
+       register char   *p = cp;
+
+       SKIPWS(p);                      /* discard whitespace */
+       do {
+               if ((*txp = *p++) == '\\')      /* handle escapes */
+                       *txp = *p++;
+               if (*txp == '\0')               /* we're at end of input */
+                       return(cp = --p, ++txp);
+               else if (*txp == '\n')          /* also SKIPWS after newline */
+                       SKIPWS(p);
+       } while (txp++);                /* keep going till we find that nul */
+       return(txp);
+}
+
+/* find the label matching *ptr, return NULL if none */
+static label *search(label *ptr)               /* uses global lablst */
+{
+       register label  *rp;
+       for(rp = lablst; rp < ptr; rp++)
+               if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0))
+                       return(rp);
+       return(NULL);
+}
+
+/* write label links into the compiled-command space */
+static void resolve(void)                      /* uses global lablst */
+{
+       register label          *lptr;
+       register sedcmd         *rptr, *trptr;
+
+       /* loop through the label table */
+       for(lptr = lablst; lptr < lab; lptr++)
+               if (lptr->address == NULL)      /* barf if not defined */
+               {
+                       fprintf(stderr, ULABL, lptr->name);
+                       exit(2);
+               }
+               else if (lptr->last)            /* if last is non-null */
+               {
+                       rptr = lptr->last;              /* chase it */
+                       while((trptr = rptr->u.link))   /* resolve refs */
+                       {
+                               rptr->u.link = lptr->address;
+                               rptr = trptr;
+                       }
+                       rptr->u.link = lptr->address;
+               }
+}
+
+/* compile a y (transliterate) command
+   ep:         where to compile to
+   delim:      end delimiter to look for */
+static char *ycomp(char *ep, char delim)
+{
+       char *tp, *sp;
+       int c;
+
+       /* scan the 'from' section for invalid chars */
+       for(sp = tp = cp; *tp != delim; tp++)
+       {
+               if (*tp == '\\')
+                       tp++;
+               if ((*tp == '\n') || (*tp == '\0'))
+                       return(BAD);
+       }
+       tp++;           /* tp now points at first char of 'to' section */
+
+       /* now rescan the 'from' section */
+       while((c = *sp++ & 0x7F) != delim)
+       {
+               if (c == '\\' && *sp == 'n')
+               {
+                       sp++;
+                       c = '\n';
+               }
+               if ((ep[c] = *tp++) == '\\' && *tp == 'n')
+               {
+                       ep[c] = '\n';
+                       tp++;
+               }
+               if ((ep[c] == delim) || (ep[c] == '\0'))
+                       return(BAD);
+       }
+
+       if (*tp != delim)       /* 'to', 'from' parts have unequal lengths */
+               return(BAD);
+
+       cp = ++tp;                      /* point compile ptr past translit */
+
+       for(c = 0; c < 128; c++)        /* fill in self-map entries in table */
+               if (ep[c] == 0)
+                       ep[c] = c;
+
+       return(ep + 0x80);      /* return first free location past table end */
+}
+
+/* sedcomp.c ends here */
diff --git a/commands/sed/sedexec.c b/commands/sed/sedexec.c
new file mode 100644 (file)
index 0000000..5088b70
--- /dev/null
@@ -0,0 +1,819 @@
+/* sedexec.c -- axecute compiled form of stream editor commands
+   Copyright (C) 1995-2003 Eric S. Raymond
+   Copyright (C) 2004-2006 Rene Rebe
+
+   The single entry point of this module is the function execute(). It
+may take a string argument (the name of a file to be used as text)  or
+the argument NULL which tells it to filter standard input. It executes
+the compiled commands in cmds[] on each line in turn.
+   The function command() does most of the work.  match() and advance()
+are used for matching text against precompiled regular expressions and
+dosub() does right-hand-side substitution.  Getline() does text input;
+readout() and memcmp() are output and string-comparison utilities.  
+*/
+
+#include <stdlib.h>    /* exit */
+#include <stdio.h>     /* {f}puts, {f}printf, getc/putc, f{re}open, fclose */
+#include <ctype.h>     /* for isprint(), isdigit(), toascii() macros */
+#include <string.h>    /* for memcmp(3) */
+#include "sed.h"       /* command type structures & miscellaneous constants */
+
+/***** shared variables imported from the main ******/
+
+/* main data areas */
+extern char    linebuf[];      /* current-line buffer */
+extern sedcmd  cmds[];         /* hold compiled commands */
+extern long    linenum[];      /* numeric-addresses table */
+
+/* miscellaneous shared variables */
+extern int     nflag;          /* -n option flag */
+extern int     eargc;          /* scratch copy of argument count */
+extern sedcmd  *pending;       /* ptr to command waiting to be executed */
+
+extern int     last_line_used; /* last line address ($) used */
+
+/***** end of imported stuff *****/
+
+#define MAXHOLD                MAXBUF  /* size of the hold space */
+#define GENSIZ         MAXBUF  /* maximum genbuf size */
+
+static char LTLMSG[]   = "sed: line too long\n";
+
+static char    *spend;         /* current end-of-line-buffer pointer */
+static long    lnum = 0L;      /* current source line number */
+
+/* append buffer maintenance */
+static sedcmd  *appends[MAXAPPENDS];   /* array of ptrs to a,i,c commands */
+static sedcmd  **aptr = appends;       /* ptr to current append */
+
+/* genbuf and its pointers */
+static char    genbuf[GENSIZ];
+static char    *loc1;
+static char    *loc2;
+static char    *locs;
+
+/* command-logic flags */
+static int     lastline;               /* do-line flag */
+static int     line_with_newline;      /* line had newline */
+static int     jump;                   /* jump to cmd's link address if set */
+static int     delete;                 /* delete command flag */
+static int     needs_advance;          /* needs inc after substitution */
+                                       /* ugly HACK - neds REWORK */
+
+/* tagged-pattern tracking */
+static char    *bracend[MAXTAGS];      /* tagged pattern start pointers */
+static char    *brastart[MAXTAGS];     /* tagged pattern end pointers */
+
+/* prototypes */
+static char *getline(char *buf, int max);
+static char *place(char* asp, char* al1, char* al2);
+static int advance(char* lp, char* ep, char** eob);
+static int match(char *expbuf, int gf);
+static int selected(sedcmd *ipc);
+static int substitute(sedcmd *ipc);
+static void command(sedcmd *ipc);
+static void dosub(char *rhsbuf);
+static void dumpto(char *p1, FILE *fp);
+static void listto(char *p1, FILE *fp);
+static void readout(void);
+static void truncated(int h);
+
+/* execute the compiled commands in cmds[] on a file
+   file:  name of text source file to be filtered */
+void execute(char* file)
+{
+       register sedcmd         *ipc;           /* ptr to current command */
+       char                    *execp;         /* ptr to source */
+
+       if (file != NULL)       /* filter text from a named file */ 
+               if (freopen(file, "r", stdin) == NULL)
+                       fprintf(stderr, "sed: can't open %s\n", file);
+
+       if (pending)            /* there's a command waiting */
+       {
+               ipc = pending;          /* it will be first executed */
+               pending = FALSE;        /*   turn off the waiting flag */
+               goto doit;              /*   go to execute it immediately */
+       }
+
+       /* here's the main command-execution loop */
+       for(;;)
+       {
+               /* get next line to filter */
+               if ((execp = getline(linebuf, MAXBUF+1)) == BAD)
+                       return;
+               spend = execp;
+
+               /* loop through compiled commands, executing them */
+               for(ipc = cmds; ipc->command; )
+               {
+                       /* address command to select? - If not address
+                          but allbut then invert, that is skip, the commmand */
+                       if (ipc->addr1 || ipc->flags.allbut) {
+                               if (!ipc->addr1 || !selected(ipc)) {
+                                       ipc++;  /* not selected, next cmd */
+                                       continue;
+                               }
+                       }
+       doit:
+                       command(ipc);   /* execute the command pointed at */
+
+                       if (delete)     /* if delete flag is set */
+                               break;  /* don't exec rest of compiled cmds */
+
+                       if (jump)       /* if jump set, follow cmd's link */
+                       {
+                               jump = FALSE;
+                               if ((ipc = ipc->u.link) == 0)
+                               {
+                                       ipc = cmds;
+                                       break;
+                               }
+                       }
+                       else            /* normal goto next command */
+                               ipc++;
+               }
+               /* we've now done all modification commands on the line */
+
+               /* here's where the transformed line is output */
+               if (!nflag && !delete)
+               {
+                       fwrite(linebuf, spend - linebuf, 1, stdout);
+                       if (line_with_newline)
+                               putc('\n', stdout);
+               }
+
+               /* if we've been set up for append, emit the text from it */
+               if (aptr > appends)
+                       readout();
+
+               delete = FALSE; /* clear delete flag; about to get next cmd */
+       }
+}
+
+/* is current command selected */
+static int selected(sedcmd *ipc)
+{
+       register char   *p1 = ipc->addr1;       /* point p1 at first address */
+       register char   *p2 = ipc->addr2;       /*   and p2 at second */
+       unsigned char   c;
+       int selected = FALSE;
+
+       if (ipc->flags.inrange)
+       {
+               selected = TRUE;
+               if (*p2 == CEND)
+                       ;
+               else if (*p2 == CLNUM)
+               {
+                       c = p2[1];
+                       if (lnum >= linenum[c])
+                               ipc->flags.inrange = FALSE;
+               }
+               else if (match(p2, 0))
+                       ipc->flags.inrange = FALSE;
+       }
+       else if (*p1 == CEND)
+       {
+               if (lastline)
+                       selected = TRUE;
+       }
+       else if (*p1 == CLNUM)
+       {
+               c = p1[1];
+               if (lnum == linenum[c]) {
+                       selected = TRUE;
+                       if (p2)
+                               ipc->flags.inrange = TRUE;
+               }
+       }
+       else if (match(p1, 0))
+       {
+               selected = TRUE;
+               if (p2)
+                       ipc->flags.inrange = TRUE;
+       }
+       return ipc->flags.allbut ? !selected : selected;
+}
+
+/* match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */
+static int match(char *expbuf, int gf) /* uses genbuf */
+{
+       char *p1, *p2, c;
+
+       if (gf)
+       {
+               if (*expbuf)
+                       return(FALSE);
+               p1 = linebuf; p2 = genbuf;
+               while ((*p1++ = *p2++));
+               if (needs_advance) {
+                       loc2++;
+               }
+               locs = p1 = loc2;
+       }
+       else
+       {
+               p1 = linebuf + needs_advance;
+               locs = FALSE;
+       }
+       needs_advance = 0;
+
+       p2 = expbuf;
+       if (*p2++)
+       {
+               loc1 = p1;
+               if(*p2 == CCHR && p2[1] != *p1) /* 1st char is wrong */
+                       return(FALSE);          /*   so fail */
+               return(advance(p1, p2, NULL));  /* else try to match rest */
+       }
+
+       /* quick check for 1st character if it's literal */
+       if (*p2 == CCHR)
+       {
+               c = p2[1];              /* pull out character to search for */
+               do {
+                       if (*p1 != c)
+                               continue;       /* scan the source string */
+                       if (advance(p1, p2,NULL)) /* found it, match the rest */
+                               return(loc1 = p1, 1);
+               } while
+                       (*p1++);
+               return(FALSE);          /* didn't find that first char */
+       }
+
+       /* else try for unanchored match of the pattern */
+       do {
+               if (advance(p1, p2, NULL))
+                       return(loc1 = p1, 1);
+       } while
+               (*p1++);
+
+       /* if got here, didn't match either way */
+       return(FALSE);
+}
+
+/* attempt to advance match pointer by one pattern element
+   lp: source (linebuf) ptr
+   ep: regular expression element ptr */
+static int advance(char* lp, char* ep, char** eob)
+{
+       char    *curlp;         /* save ptr for closures */ 
+       char    c;              /* scratch character holder */
+       char    *bbeg;
+       int     ct;
+       signed int      bcount = -1;
+
+       for (;;)
+               switch (*ep++)
+               {
+               case CCHR:              /* literal character */
+                       if (*ep++ == *lp++)     /* if chars are equal */
+                               continue;       /* matched */
+                       return(FALSE);          /* else return false */
+
+               case CDOT:              /* anything but newline */
+                       if (*lp++)              /* first NUL is at EOL */
+                               continue;       /* keep going if didn't find */
+                       return(FALSE);          /* else return false */
+
+               case CNL:               /* start-of-line */
+               case CDOL:              /* end-of-line */
+                       if (*lp == 0)           /* found that first NUL? */
+                               continue;       /* yes, keep going */
+                       return(FALSE);          /* else return false */
+
+               case CEOF:              /* end-of-address mark */
+                       loc2 = lp;              /* set second loc */
+                       return(TRUE);           /* return true */
+
+               case CCL:               /* a closure */
+                       c = *lp++ & 0177;
+                       if (ep[c>>3] & bits(c & 07))    /* is char in set? */
+                       {
+                               ep += 16;       /* then skip rest of bitmask */
+                               continue;       /*   and keep going */
+                       }
+                       return(FALSE);          /* else return false */
+
+               case CBRA:              /* start of tagged pattern */
+                       brastart[(unsigned char)*ep++] = lp;    /* mark it */
+                       continue;               /* and go */
+
+               case CKET:              /* end of tagged pattern */
+                       bcount = *ep;
+                       if (eob) {
+                               *eob = lp;
+                               return (TRUE);
+                       }
+                       else
+                               bracend[(unsigned char)*ep++] = lp;    /* mark it */
+                       continue;               /* and go */
+
+               case CBACK:             /* match back reference */
+                       bbeg = brastart[(unsigned char)*ep];
+                       ct = bracend[(unsigned char)*ep++] - bbeg;
+
+                       if (memcmp(bbeg, lp, ct) == 0)
+                       {
+                               lp += ct;
+                               continue;
+                       }
+                       return(FALSE);
+
+               case CBRA|STAR:         /* \(...\)* */
+               {
+                       char *lastlp;
+                       curlp = lp;
+
+                       if (*ep > bcount)
+                               brastart[(unsigned char)*ep] = bracend[(unsigned char)*ep] = lp;
+
+                       while (advance(lastlp=lp, ep+1, &lp)) {
+                               if (*ep > bcount && lp != lastlp) {
+                                       bracend[(unsigned char)*ep] = lp;    /* mark it */
+                                       brastart[(unsigned char)*ep] = lastlp;
+                               }
+                               if (lp == lastlp) break;
+                       }
+                       ep++;
+
+                       /* FIXME: scan for the brace end */
+                       while (*ep != CKET)
+                               ep++;
+                       ep+=2;
+
+                       needs_advance = 1;
+                       if (lp == curlp) /* 0 matches */
+                               continue;
+                       lp++; 
+                       goto star;
+               }
+               case CBACK|STAR:        /* \n* */
+                       bbeg = brastart[(unsigned char)*ep];
+                       ct = bracend[(unsigned char)*ep++] - bbeg;
+                       curlp = lp;
+                       while(memcmp(bbeg, lp, ct) == 0)
+                               lp += ct;
+
+                       while(lp >= curlp)
+                       {
+                               if (advance(lp, ep, eob))
+                                       return(TRUE);
+                               lp -= ct;
+                       }
+                       return(FALSE);
+
+               case CDOT|STAR:         /* match .* */
+                       curlp = lp;             /* save closure start loc */
+                       while (*lp++);          /* match anything */ 
+                       goto star;              /* now look for followers */
+
+               case CCHR|STAR:         /* match <literal char>* */
+                       curlp = lp;             /* save closure start loc */
+                       while (*lp++ == *ep);   /* match many of that char */
+                       ep++;                   /* to start of next element */
+                       goto star;              /* match it and followers */
+
+               case CCL|STAR:          /* match [...]* */
+                       curlp = lp;             /* save closure start loc */
+                       do {
+                               c = *lp++ & 0x7F;       /* match any in set */
+                       } while
+                               (ep[c>>3] & bits(c & 07));
+                       ep += 16;               /* skip past the set */
+                       goto star;              /* match followers */
+
+               star:           /* the recursion part of a * or + match */
+                       needs_advance = 1;
+                       if (--lp == curlp) {    /* 0 matches */
+                               continue;
+                       }
+#if 0
+                       if (*ep == CCHR)
+                       {
+                               c = ep[1];
+                               do {
+                                       if (*lp != c)
+                                               continue;
+                                       if (advance(lp, ep, eob))
+                                               return(TRUE);
+                               } while
+                               (lp-- > curlp);
+                               return(FALSE);
+                       }
+
+                       if (*ep == CBACK)
+                       {
+                               c = *(brastart[ep[1]]);
+                               do {
+                                       if (*lp != c)
+                                               continue;
+                                       if (advance(lp, ep, eob))
+                                               return(TRUE);
+                               } while
+                                       (lp-- > curlp);
+                               return(FALSE);
+                       }
+#endif
+                       /* match followers, try shorter match, if needed */
+                       do {
+                               if (lp == locs)
+                                       break;
+                               if (advance(lp, ep, eob))
+                                       return(TRUE);
+                       } while
+                               (lp-- > curlp);
+                       return(FALSE);
+
+               default:
+                       fprintf(stderr, "sed: internal RE error, %o\n", *--ep);
+                       exit (2);
+               }
+}
+
+/* perform s command
+   ipc:        ptr to s command struct */
+static int substitute(sedcmd *ipc)
+{
+       unsigned int n = 1;
+       /* find a match */
+       /* the needs_advance code got a bit tricky - might needs a clean
+          refactoring */
+       while (match(ipc->u.lhs, 0)) {
+               /* nth 0 is implied 1 */
+               if (!ipc->nth || n == ipc->nth) {
+                       dosub(ipc->rhs);                /* perform it once */
+                       n++;                            /* mark for return */
+                       break;
+               }
+               needs_advance = n++;
+       }
+       if (n == 1)
+               return(FALSE);                  /* command fails */
+
+       if (ipc->flags.global)                  /* if global flag enabled */
+               do {                            /* cycle through possibles */
+                       if (match(ipc->u.lhs, 1)) {     /* found another */
+                               dosub(ipc->rhs);        /* so substitute */
+                       }
+                       else                            /* otherwise, */
+                               break;                  /* we're done */
+               } while (*loc2);
+       return(TRUE);                           /* we succeeded */
+}
+
+/* generate substituted right-hand side (of s command)
+   rhsbuf:     where to put the result */
+static void dosub(char *rhsbuf)                /* uses linebuf, genbuf, spend */
+{
+       char    *lp, *sp, *rp;
+       int     c;
+
+       /* copy linebuf to genbuf up to location 1 */
+       lp = linebuf; sp = genbuf;
+       while (lp < loc1) *sp++ = *lp++;
+
+       for (rp = rhsbuf; (c = *rp++); )
+       {
+               if (c & 0200 && (c & 0177) == '0')
+               {
+                       sp = place(sp, loc1, loc2);
+                       continue;
+               }
+               else if (c & 0200 && (c &= 0177) >= '1' && c < MAXTAGS+'1')
+               {
+                       sp = place(sp, brastart[c-'1'], bracend[c-'1']);
+                       continue;
+               }
+               *sp++ = c & 0177;
+               if (sp >= genbuf + MAXBUF)
+                       fprintf(stderr, LTLMSG);
+
+       }
+       lp = loc2;
+       loc2 = sp - genbuf + linebuf;
+       while ((*sp++ = *lp++))
+               if (sp >= genbuf + MAXBUF)
+                       fprintf(stderr, LTLMSG);
+       lp = linebuf; sp = genbuf;
+       while ((*lp++ = *sp++));
+       spend = lp-1;
+}
+
+/* place chars at *al1...*(al1 - 1) at asp... in genbuf[] */
+static char *place(char* asp, char* al1, char* al2)            /* uses genbuf */
+{
+       while (al1 < al2)
+       {
+               *asp++ = *al1++;
+               if (asp >= genbuf + MAXBUF)
+                       fprintf(stderr, LTLMSG);
+       }
+       return(asp);
+}
+
+/* list the pattern space in  visually unambiguous form *p1... to fp
+   p1: the source
+   fp: output stream to write to */
+static void listto(char *p1, FILE *fp)
+{
+       for (; p1<spend; p1++)
+               if (isprint(*p1))
+                       putc(*p1, fp);          /* pass it through */
+               else
+               {
+                       putc('\\', fp);         /* emit a backslash */
+                       switch(*p1)
+                       {
+                       case '\b':      putc('b', fp); break;   /* BS */
+                       case '\t':      putc('t', fp); break;   /* TAB */
+                       case '\n':      putc('n', fp); break;   /* NL */
+                       case '\r':      putc('r', fp); break;   /* CR */
+                       case '\033':    putc('e', fp); break;   /* ESC */
+                       default:        fprintf(fp, "%02x", *p1);
+                       }
+               }
+       putc('\n', fp);
+}
+
+/* write a hex dump expansion of *p1... to fp
+   p1: source
+   fp: output */
+static void dumpto(char *p1, FILE *fp)
+{
+       for (; p1<spend; p1++)
+               fprintf(fp, "%02x", *p1);
+       fprintf(fp, "%02x", '\n');
+       putc('\n', fp);
+}
+
+static void truncated(int h)
+{
+       static long last = 0L;
+
+       if (lnum == last) return;
+       last = lnum;
+
+       fprintf(stderr, "sed: ");
+       fprintf(stderr, h ? "hold space" : "line %ld", lnum);
+       fprintf(stderr, " truncated to %d characters\n", MAXBUF);
+}
+
+/* execute compiled command pointed at by ipc */
+static void command(sedcmd *ipc)
+{
+       static int      didsub;                 /* true if last s succeeded */
+       static char     holdsp[MAXHOLD];        /* the hold space */
+       static char     *hspend = holdsp;       /* hold space end pointer */
+       register char   *p1, *p2;
+       char            *execp;
+
+       needs_advance = 0;
+       switch(ipc->command)
+       {
+       case ACMD:              /* append */
+               *aptr++ = ipc;
+               if (aptr >= appends + MAXAPPENDS)
+                       fprintf(stderr,
+                               "sed: too many appends after line %ld\n",
+                               lnum);
+               *aptr = 0;
+               break;
+
+       case CCMD:              /* change pattern space */
+               delete = TRUE;
+               if (!ipc->flags.inrange || lastline)
+                       printf("%s\n", ipc->u.lhs);             
+               break;
+
+       case DCMD:              /* delete pattern space */
+               delete++;
+               break;
+
+       case CDCMD:             /* delete a line in hold space */
+               p1 = p2 = linebuf;
+               while(*p1 != '\n')
+                       if ((delete = (*p1++ == 0)))
+                               return;
+               p1++;
+               while((*p2++ = *p1++)) continue;
+               spend = p2-1;
+               jump++;
+               break;
+
+       case EQCMD:             /* show current line number */
+               fprintf(stdout, "%ld\n", lnum);
+               break;
+
+       case GCMD:              /* copy hold space to pattern space */
+               p1 = linebuf;   p2 = holdsp;    while((*p1++ = *p2++));
+               spend = p1-1;
+               break;
+
+       case CGCMD:             /* append hold space to pattern space */
+               *spend++ = '\n';
+               p1 = spend;     p2 = holdsp;
+               do {
+                       if (p1 > linebuf + MAXBUF) {
+                               truncated(FALSE);
+                               p1[-1] = 0;
+                               break;
+                       }
+               } while((*p1++ = *p2++));
+
+               spend = p1-1;
+               break;
+
+       case HCMD:              /* copy pattern space to hold space */
+               p1 = holdsp;    p2 = linebuf;   while((*p1++ = *p2++));
+               hspend = p1-1;
+               break;
+
+       case CHCMD:             /* append pattern space to hold space */
+               *hspend++ = '\n';
+               p1 = hspend;    p2 = linebuf;
+               do {
+                       if (p1 > holdsp + MAXBUF) {
+                               truncated(TRUE);
+                               p1[-1] = 0;
+                               break;
+                       }
+               } while((*p1++ = *p2++));
+
+               hspend = p1-1;
+               break;
+
+       case ICMD:              /* insert text */
+               printf("%s\n", ipc->u.lhs);
+               break;
+
+       case BCMD:              /* branch to label */
+               jump = TRUE;
+               break;
+
+       case LCMD:              /* list text */
+               listto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
+
+       case CLCMD:             /* dump text */
+               dumpto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
+
+       case NCMD:              /* read next line into pattern space */
+               if (!nflag)
+                       puts(linebuf);  /* flush out the current line */
+               if (aptr > appends)
+                       readout();      /* do pending a, r commands */
+               if ((execp = getline(linebuf, MAXBUF+1)) == BAD)
+               {
+                       pending = ipc;
+                       delete = TRUE;
+                       break;
+               }
+               spend = execp;
+               break;
+
+       case CNCMD:             /* append next line to pattern space */
+               if (aptr > appends)
+                       readout();
+               *spend++ = '\n';
+               if ((execp = getline(spend,
+                                    linebuf + MAXBUF+1 - spend)) == BAD)
+               {
+                       pending = ipc;
+                       delete = TRUE;
+                       break;
+               }
+               spend = execp;
+               break;
+
+       case PCMD:              /* print pattern space */
+               puts(linebuf);
+               break;
+
+       case CPCMD:             /* print one line from pattern space */
+               cpcom:          /* so s command can jump here */
+               for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
+                       putc(*p1++, stdout);
+               putc('\n', stdout);
+               break;
+
+       case QCMD:              /* quit the stream editor */
+               if (!nflag)
+                       puts(linebuf);  /* flush out the current line */
+               if (aptr > appends)
+                       readout();      /* do any pending a and r commands */
+               exit(0);
+
+       case RCMD:              /* read a file into the stream */
+               *aptr++ = ipc;
+               if (aptr >= appends + MAXAPPENDS)
+                       fprintf(stderr,
+                               "sed: too many reads after line %ld\n",
+                               lnum);
+               *aptr = 0;
+               break;
+
+       case SCMD:              /* substitute RE */
+               didsub = substitute(ipc);
+               if (ipc->flags.print && didsub)
+               {
+                       if (ipc->flags.print == TRUE)
+                               puts(linebuf);
+                       else
+                               goto cpcom;
+               }
+               if (didsub && ipc->fout)
+                       fprintf(ipc->fout, "%s\n", linebuf);
+               break;
+
+       case TCMD:              /* branch on last s successful */
+       case CTCMD:             /* branch on last s failed */
+               if (didsub == (ipc->command == CTCMD))
+                       break;          /* no branch if last s failed, else */
+               didsub = FALSE;
+               jump = TRUE;            /*  set up to jump to assoc'd label */
+               break;
+
+       case CWCMD:             /* write one line from pattern space */
+               for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
+                       putc(*p1++, ipc->fout);
+               putc('\n', ipc->fout);
+               break;
+
+       case WCMD:              /* write pattern space to file */
+               fprintf(ipc->fout, "%s\n", linebuf);
+               break;
+
+       case XCMD:              /* exchange pattern and hold spaces */
+               p1 = linebuf;   p2 = genbuf;    while((*p2++ = *p1++)) continue;
+               p1 = holdsp;    p2 = linebuf;   while((*p2++ = *p1++)) continue;
+               spend = p2 - 1;
+               p1 = genbuf;    p2 = holdsp;    while((*p2++ = *p1++)) continue;
+               hspend = p2 - 1;
+               break;
+
+       case YCMD:
+               p1 = linebuf;   p2 = ipc->u.lhs;
+               while((*p1 = p2[(unsigned char)*p1]))
+                       p1++;
+               break;
+       }
+}
+
+/* get next line of text to be filtered
+   buf: where to send the input
+   max: max chars to read */
+static char *getline(char *buf, int max)
+{
+       if (fgets(buf, max, stdin) != NULL)
+       {
+               int c;
+
+               lnum++;                 /* note that we got another line */
+               /* find the end of the input and overwrite a possible '\n' */
+               while (*buf != '\n' && *buf != 0)
+                   buf++;
+               line_with_newline = *buf == '\n';
+               *buf=0;
+
+               /* detect last line - but only if the address was used in a command */
+               if  (last_line_used) {
+                 if ((c = fgetc(stdin)) != EOF)
+                       ungetc (c, stdin);
+                 else {
+                       if (eargc == 0)         /* if no more args */
+                               lastline = TRUE;        /* set a flag */
+                 }
+               }
+
+               return(buf);            /* return ptr to terminating null */ 
+       }
+       else
+       {
+               return(BAD);
+       }
+}
+
+/* write file indicated by r command to output */
+static void readout(void)
+{
+       register int    t;      /* hold input char or EOF */
+       FILE            *fi;    /* ptr to file to be read */
+
+       aptr = appends - 1;     /* arrange for pre-increment to work right */
+       while(*++aptr)
+               if ((*aptr)->command == ACMD)           /* process "a" cmd */
+                       printf("%s\n", (*aptr)->u.lhs);
+               else                                    /* process "r" cmd */
+               {
+                       if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL)
+                               continue;
+                       while((t = getc(fi)) != EOF)
+                               putc((char) t, stdout);
+                       fclose(fi);
+               }
+       aptr = appends;         /* reset the append ptr */
+       *aptr = 0;
+}
+
+/* sedexec.c ends here */