]> Zhao Yanbai Git Server - minix.git/commitdiff
gas2ack
authorTomas Hruby <tom@minix3.org>
Fri, 30 Oct 2009 15:57:35 +0000 (15:57 +0000)
committerTomas Hruby <tom@minix3.org>
Fri, 30 Oct 2009 15:57:35 +0000 (15:57 +0000)
    - an asmconv based tool for conversion from GNU ia32 assembly to ACK assembly

    - in contrast to asmconv it is a one way tool only

    - as the GNU assembly in Minix does not prefix global C symbols with _ gas2ack
      detects such symbols and prefixes them to be compliant with the ACK convention

    - gas2ack preserves comments and unexpanded macros

    - bunch of fixes to the asmconv GNU->ACK direction

    - support of more instructions that ACK does not know but are in use in Minix

    - it is meant as a temporary solution as long as ACK will be a supported
      compiler for the core system

13 files changed:
commands/i386/Makefile
commands/i386/gas2ack/Makefile [new file with mode: 0644]
commands/i386/gas2ack/asm86.c [new file with mode: 0644]
commands/i386/gas2ack/asm86.h [new file with mode: 0644]
commands/i386/gas2ack/asmconv.h [new file with mode: 0644]
commands/i386/gas2ack/emit_ack.c [new file with mode: 0644]
commands/i386/gas2ack/gas2ack.c [new file with mode: 0644]
commands/i386/gas2ack/globals.c [new file with mode: 0644]
commands/i386/gas2ack/globals.h [new file with mode: 0644]
commands/i386/gas2ack/languages.h [new file with mode: 0644]
commands/i386/gas2ack/parse_gnu.c [new file with mode: 0644]
commands/i386/gas2ack/token.h [new file with mode: 0644]
commands/i386/gas2ack/tokenize.c [new file with mode: 0644]

index b052d08d561e6dc876f84fcf5b8b7a4d51e3caca..a391d88691c06f7fa2f67c1dad5b9861be18e273 100755 (executable)
@@ -28,3 +28,4 @@ clean::
 
 all install clean::
        cd asmconv && $(MAKE) $@
+       cd gas2ack && $(MAKE) $@
diff --git a/commands/i386/gas2ack/Makefile b/commands/i386/gas2ack/Makefile
new file mode 100644 (file)
index 0000000..3c25c6a
--- /dev/null
@@ -0,0 +1,29 @@
+# Makefile for gas2ack.
+
+CFLAGS=                $(OPT)
+LDFLAGS=       -i
+CC = exec cc
+
+all:   gas2ack
+
+OBJ=   asm86.o gas2ack.o globals.o parse_gnu.o \
+       tokenize.o emit_ack.o
+
+gas2ack:       $(OBJ)
+       $(CC) $(LDFLAGS) -o $@ $(OBJ)
+
+install:       /usr/bin/gas2ack
+
+/usr/bin/gas2ack:      gas2ack
+       install -cs -o bin gas2ack $@
+
+clean:
+       rm -f $(OBJ) gas2ack core
+
+# Dependencies.
+asm86.o:       asm86.h asmconv.h token.h
+gas2ack.o:     asmconv.h languages.h asm86.h
+globals.o:     asm86.h
+parse_gnu.o:   asmconv.h languages.h token.h asm86.h
+tokenize.o:    asmconv.h token.h
+emit_ack.o:    asmconv.h languages.h token.h asm86.h
diff --git a/commands/i386/gas2ack/asm86.c b/commands/i386/gas2ack/asm86.c
new file mode 100644 (file)
index 0000000..29c5b7b
--- /dev/null
@@ -0,0 +1,87 @@
+/*     asm86.c - 80X86 assembly intermediate           Author: Kees J. Bot
+ *                                                             24 Dec 1993
+ */
+#define nil 0
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include "asm86.h"
+#include "asmconv.h"
+#include "token.h"
+
+expression_t *new_expr(void)
+/* Make a new cell to build an expression. */
+{
+       expression_t *e;
+
+       e= allocate(nil, sizeof(*e));
+       e->operator= -1;
+       e->left= e->middle= e->right= nil;
+       e->name= nil;
+       e->magic= 31624;
+       return e;
+}
+
+void del_expr(expression_t *e)
+/* Delete an expression tree. */
+{
+       if (e != nil) {
+               assert(e->magic == 31624);
+               e->magic= 0;
+               deallocate(e->name);
+               del_expr(e->left);
+               del_expr(e->middle);
+               del_expr(e->right);
+               deallocate(e);
+       }
+}
+
+asm86_t *new_asm86(void)
+/* Make a new cell to hold an 80X86 instruction. */
+{
+       asm86_t *a;
+
+       a= allocate(nil, sizeof(*a));
+       a->opcode= -1;
+       get_file(&a->file, &a->line);
+       a->optype= NONE;
+       a->oaz= 0;
+       a->rep= ONCE;
+       a->seg= DEFSEG;
+       a->args= nil;
+       a->magic= 37937;
+       a->raw_string = NULL;
+       return a;
+}
+
+void del_asm86(asm86_t *a)
+/* Delete an 80X86 instruction. */
+{
+       assert(a != nil);
+       assert(a->magic == 37937);
+       a->magic= 0;
+       del_expr(a->args);
+       deallocate(a->raw_string);
+       a->raw_string = NULL;
+       deallocate(a);
+}
+
+int isregister(const char *name)
+/* True if the string is a register name.  Return its size. */
+{
+       static char *regs[] = {
+               "al", "bl", "cl", "dl", "ah", "bh", "ch", "dh",
+               "ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
+               "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
+               "cs", "ds", "es", "fs", "gs", "ss",
+               "cr0", "cr1", "cr2", "cr3", "cr4",
+               "st",
+       };
+       int reg;
+
+       for (reg= 0; reg < arraysize(regs); reg++) {
+               if (strcmp(name, regs[reg]) == 0)
+                       return reg+1;
+       }
+       return 0;
+}
diff --git a/commands/i386/gas2ack/asm86.h b/commands/i386/gas2ack/asm86.h
new file mode 100644 (file)
index 0000000..aae77bd
--- /dev/null
@@ -0,0 +1,272 @@
+/*     asm86.h - 80X86 assembly intermediate           Author: Kees J. Bot
+ *                                                             27 Jun 1993
+ */
+
+typedef enum opcode {  /* 80486 opcodes, from the i486 reference manual.
+                        * Synonyms left out, some new words invented.
+                        */
+       DOT_ALIGN,
+       DOT_ASCII,      DOT_ASCIZ,
+       DOT_ASSERT,                     /* Pseudo's invented */
+       DOT_BASE,
+       DOT_COMM,       DOT_LCOMM,
+       DOT_DATA1,
+       DOT_DATA2,
+       DOT_DATA4,
+       DOT_DEFINE,     DOT_EXTERN,
+       DOT_EQU,
+       DOT_FILE,       DOT_LINE,
+       DOT_LABEL,
+       DOT_LIST,       DOT_NOLIST,
+       DOT_SPACE,
+       DOT_SYMB,
+       DOT_TEXT,       DOT_ROM,        DOT_DATA,       DOT_BSS,        DOT_END,
+       DOT_USE16,      DOT_USE32,
+       AAA,
+       AAD,
+       AAM,
+       AAS,
+       ADC,
+       ADD,
+       AND,
+       ARPL,
+       BOUND,
+       BSF,
+       BSR,
+       BSWAP,
+       BT,
+       BTC,
+       BTR,
+       BTS,
+       CALL,   CALLF,                  /* CALLF added */
+       CBW,
+       CLC,
+       CLD,
+       CLI,
+       CLTS,
+       CMC,
+       CMP,
+       CMPS,
+       CMPXCHG,
+       CPUID,
+       CWD,
+       DAA,
+       DAS,
+       DEC,
+       DIV,
+       ENTER,
+       F2XM1,
+       FABS,
+       FADD,   FADDD,  FADDS,  FADDP,  FIADDL, FIADDS,
+       FBLD,
+       FBSTP,
+       FCHS,
+       FCLEX,
+       FCOMD,  FCOMS,  FCOMPD, FCOMPS, FCOMPP,
+       FCOS,
+       FDECSTP,
+       FDIVD,  FDIVS,  FDIVP,  FIDIVL, FIDIVS,
+       FDIVRD, FDIVRS, FDIVRP, FIDIVRL,        FIDIVRS,
+       FFREE,
+       FICOM,  FICOMP,
+       FILDQ,  FILDL,  FILDS,
+       FINCSTP,
+       FINIT,
+       FISTL,  FISTS,  FISTP,
+       FLDX,   FLDD,   FLDS,
+       FLD1,   FLDL2T, FLDL2E, FLDPI,  FLDLG2, FLDLN2, FLDZ,
+       FLDCW,
+       FLDENV,
+       FMULD,  FMULS,  FMULP,  FIMULL, FIMULS,
+       FNOP,
+       FPATAN,
+       FPREM,
+       FPREM1,
+       FPTAN,
+       FRNDINT,
+       FRSTOR,
+       FSAVE,
+       FSCALE,
+       FSIN,
+       FSINCOS,
+       FSQRT,
+       FSTD,   FSTS,   FSTPX,  FSTPD,  FSTPS,
+       FSTCW,
+       FSTENV,
+       FSTSW,
+       FSUBD,  FSUBS,  FSUBP,  FISUBL, FISUBS,
+       FSUBRD, FSUBRS, FSUBPR, FISUBRL, FISUBRS,
+       FTST,
+       FUCOM,  FUCOMP, FUCOMPP,
+       FXAM,
+       FXCH,
+       FXTRACT,
+       FYL2X,
+       FYL2XP1,
+       HLT,
+       IDIV,
+       IMUL,
+       IN,
+       INC,
+       INS,
+       INT,    INTO,
+       INVD,
+       INVLPG,
+       IRET,   IRETD,
+       JA,     JAE,    JB,     JBE,    JCXZ,   JE,     JG,     JGE,    JL,
+       JLE,    JNE,    JNO,    JNP,    JNS,    JO,     JP,     JS,
+       JMP,    JMPF,                   /* JMPF added */
+       LAHF,
+       LAR,
+       LEA,
+       LEAVE,
+       LGDT,   LIDT,
+       LGS,    LSS,    LDS,    LES,    LFS,
+       LLDT,
+       LMSW,
+       LOCK,
+       LODS,
+       LOOP,   LOOPE,  LOOPNE,
+       LSL,
+       LTR,
+       MOV,
+       MOVS,
+       MOVSX,
+       MOVSXB,
+       MOVZX,
+       MOVZXB,
+       MUL,
+       NEG,
+       NOP,
+       NOT,
+       OR,
+       OUT,
+       OUTS,
+       POP,
+       POPA,
+       POPAD,
+       POPF,
+       PUSH,
+       PUSHA,
+       PUSHAD,
+       PUSHF,
+       RCL,    RCR,    ROL,    ROR,
+       RET,    RETF,                   /* RETF added */
+       SAHF,
+       SAL,    SAR,    SHL,    SHR,
+       SBB,
+       SCAS,
+       SETA,   SETAE,  SETB,   SETBE,  SETE,   SETG,   SETGE,  SETL,
+       SETLE,  SETNE,  SETNO,  SETNP,  SETNS,  SETO,   SETP,   SETS,
+       SGDT,   SIDT,
+       SHLD,
+       SHRD,
+       SLDT,
+       SMSW,
+       STC,
+       STD,
+       STI,
+       STOS,
+       STR,
+       SUB,
+       TEST,
+       VERR,   VERW,
+       WAIT,
+       WBINVD,
+       XADD,
+       XCHG,
+       XLAT,
+       XOR,
+       COMMENT,
+       C_PREPROCESSOR,
+       UNKNOWN
+} opcode_t;
+
+#define is_pseudo(o)   ((o) <= DOT_USE32)
+#define N_OPCODES      ((int) XOR + 1)
+
+#define OPZ    0x01            /* Operand size prefix. */
+#define ADZ    0x02            /* Address size prefix. */
+
+typedef enum optype {
+       NONE, PSEUDO,   JUMP,   JUMP16, BYTE,   WORD,   OWORD   /* Ordered list! */
+} optype_t;
+
+typedef enum repeat {
+       ONCE,   REP,    REPE,   REPNE
+} repeat_t;
+
+typedef enum segment {
+       DEFSEG, CSEG,   DSEG,   ESEG,   FSEG,   GSEG,   SSEG
+} segment_t;
+
+typedef struct expression {
+       int             operator;
+       struct expression *left, *middle, *right;
+       char            *name;
+       size_t          len;
+       unsigned        magic;
+} expression_t;
+
+typedef struct asm86 {
+       opcode_t        opcode;         /* DOT_TEXT, MOV, ... */
+       char            *file;          /* Name of the file it is found in. */
+       long            line;           /* Line number. */
+       optype_t        optype;         /* Type of operands: byte, word... */
+       int             oaz;            /* Operand/address size prefix? */
+       repeat_t        rep;            /* Repeat prefix used on this instr. */
+       segment_t       seg;            /* Segment override. */
+       expression_t    *args;          /* Arguments in ACK order. */
+       unsigned        magic;
+       char *          raw_string;     /* each instruction can have a comment.
+                                          Instruction can be empty if the
+                                          comment is the only thing on the
+                                          line. Or the instruction can be a
+                                          preprocessor macro. It may span
+                                          multiple lines and does not contain
+                                          any instruction
+                                        */
+} asm86_t;
+
+expression_t *new_expr(void);
+void del_expr(expression_t *a);
+asm86_t *new_asm86(void);
+void del_asm86(asm86_t *a);
+
+int isregister(const char *name);
+
+#define IS_REG8(n)     ((n) >= 1 && (n) <=8)
+#define IS_REG16(n)    ((n) >= 9 && (n) <=16)
+#define IS_REG32(n)    ((n) >= 17 && (n) <=24)
+#define IS_REGSEG(n)   ((n) >= 25 && (n) <=30)
+#define IS_REGCR(n)    ((n) >= 31 && (n) <=35)
+#define segreg2seg(reg)        ((segment_t)(reg - 25 + 1))
+
+/*
+ * Format of the arguments of the asm86_t structure:
+ *
+ *
+ * ACK assembly operands       expression_t cell:
+ * or part of operand:         {operator, left, middle, right, name, len}
+ *
+ * [expr]                      {'[', nil, expr, nil}
+ * word                                {'W', nil, nil, nil, word}
+ * "string"                    {'S', nil, nil, nil, "string", strlen("string")}
+ * label = expr                        {'=', nil, expr, nil, label}
+ * expr * expr                 {'*', expr, nil, expr}
+ * - expr                      {'-', nil, expr, nil}
+ * (memory)                    {'(', nil, memory, nil}
+ * offset(base)(index*n)       {'O', offset, base, index*n}
+ * base                                {'B', nil, nil, nil, base}
+ * index*4                     {'4', nil, nil, nil, index}
+ * operand, oplist             {',', operand, nil, oplist}
+ * label :                     {':', nil, nil, nil, label}
+ *
+ * The precedence of operators is ignored.  The expression is simply copied
+ * as is, including parentheses.  Problems like missing operators in the
+ * target language will have to be handled by rewriting the source language.
+ * 16-bit or 32-bit registers must be used where they are required by the
+ * target assembler even though ACK makes no difference between 'ax' and
+ * 'eax'.  Asmconv is smart enough to transform compiler output.  Human made
+ * assembly can be fixed up to be transformable.
+ */
diff --git a/commands/i386/gas2ack/asmconv.h b/commands/i386/gas2ack/asmconv.h
new file mode 100644 (file)
index 0000000..4ef2eaa
--- /dev/null
@@ -0,0 +1,24 @@
+/*     asmconv.h - shared functions                    Author: Kees J. Bot
+ *                                                             19 Dec 1993
+ */
+
+#define arraysize(a)   (sizeof(a)/sizeof((a)[0]))
+#define arraylimit(a)  ((a) + arraysize(a))
+#define between(a, c, z)       \
+                       ((unsigned)((c) - (a)) <= (unsigned)((z) - (a)))
+
+void *allocate(void *mem, size_t size);
+void deallocate(void *mem);
+void fatal(char *label);
+char *copystr(const char *s);
+int isanumber(const char *s);
+
+extern int asm_mode32; /* In 32 bit mode if true. */
+
+#define use16()                (!asm_mode32)
+#define use32()                ((int) asm_mode32)
+#define set_use16()    ((void) (asm_mode32= 0))
+#define set_use32()    ((void) (asm_mode32= 1))
+
+extern int err_code;   /* Exit code. */
+#define set_error()    ((void) (err_code= EXIT_FAILURE))
diff --git a/commands/i386/gas2ack/emit_ack.c b/commands/i386/gas2ack/emit_ack.c
new file mode 100644 (file)
index 0000000..5a956e9
--- /dev/null
@@ -0,0 +1,696 @@
+/*     emit_ack.c - emit ACK assembly                  Author: Kees J. Bot
+ *                  emit NCC assembly                          27 Dec 1993
+ */
+#define nil 0
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include "asmconv.h"
+#include "token.h"
+#include "asm86.h"
+#include "languages.h"
+#include "globals.h"
+
+typedef struct mnemonic {      /* ACK as86 mnemonics translation table. */
+       opcode_t        opcode;
+       char            *name;
+} mnemonic_t;
+
+static mnemonic_t mnemtab[] = {
+       { AAA,          "aaa"           },
+       { AAD,          "aad"           },
+       { AAM,          "aam"           },
+       { AAS,          "aas"           },
+       { ADC,          "adc%"          },
+       { ADD,          "add%"          },
+       { AND,          "and%"          },
+       { ARPL,         "arpl"          },
+       { BOUND,        "bound"         },
+       { BSF,          "bsf"           },
+       { BSR,          "bsr"           },
+       { BSWAP,        "bswap"         },
+       { BT,           "bt"            },
+       { BTC,          "btc"           },
+       { BTR,          "btr"           },
+       { BTS,          "bts"           },
+       { CALL,         "call"          },
+       { CALLF,        "callf"         },
+       { CBW,          "cbw"           },
+       { CLC,          "clc"           },
+       { CLD,          "cld"           },
+       { CLI,          "cli"           },
+       { CLTS,         "clts"          },
+       { CMC,          "cmc"           },
+       { CMP,          "cmp%"          },
+       { CMPS,         "cmps%"         },
+       { CMPXCHG,      "cmpxchg"       },
+       { CWD,          "cwd"           },
+       { DAA,          "daa"           },
+       { DAS,          "das"           },
+       { DEC,          "dec%"          },
+       { DIV,          "div%"          },
+       { DOT_ALIGN,    ".align"        },
+       { DOT_ASCII,    ".ascii"        },
+       { DOT_ASCIZ,    ".asciz"        },
+       { DOT_ASSERT,   ".assert"       },
+       { DOT_BASE,     ".base"         },
+       { DOT_BSS,      ".sect .bss"    },
+       { DOT_COMM,     ".comm"         },
+       { DOT_DATA,     ".sect .data"   },
+       { DOT_DATA1,    ".data1"        },
+       { DOT_DATA2,    ".data2"        },
+       { DOT_DATA4,    ".data4"        },
+       { DOT_DEFINE,   ".define"       },
+       { DOT_END,      ".sect .end"    },
+       { DOT_EXTERN,   ".extern"       },
+       { DOT_FILE,     ".file"         },
+       { DOT_LCOMM,    ".comm"         },
+       { DOT_LINE,     ".line"         },
+       { DOT_LIST,     ".list"         },
+       { DOT_NOLIST,   ".nolist"       },
+       { DOT_ROM,      ".sect .rom"    },
+       { DOT_SPACE,    ".space"        },
+       { DOT_SYMB,     ".symb"         },
+       { DOT_TEXT,     ".sect .text"   },
+       { DOT_USE16,    ".use16"        },
+       { DOT_USE32,    ".use32"        },
+       { ENTER,        "enter"         },
+       { F2XM1,        "f2xm1"         },
+       { FABS,         "fabs"          },
+       { FADD,         "fadd"          },
+       { FADDD,        "faddd"         },
+       { FADDP,        "faddp"         },
+       { FADDS,        "fadds"         },
+       { FBLD,         "fbld"          },
+       { FBSTP,        "fbstp"         },
+       { FCHS,         "fchs"          },
+       { FCLEX,        "fclex"         },
+       { FCOMD,        "fcomd"         },
+       { FCOMPD,       "fcompd"        },
+       { FCOMPP,       "fcompp"        },
+       { FCOMPS,       "fcomps"        },
+       { FCOMS,        "fcoms"         },
+       { FCOS,         "fcos"          },
+       { FDECSTP,      "fdecstp"       },
+       { FDIVD,        "fdivd"         },
+       { FDIVP,        "fdivp"         },
+       { FDIVRD,       "fdivrd"        },
+       { FDIVRP,       "fdivrp"        },
+       { FDIVRS,       "fdivrs"        },
+       { FDIVS,        "fdivs"         },
+       { FFREE,        "ffree"         },
+       { FIADDL,       "fiaddl"        },
+       { FIADDS,       "fiadds"        },
+       { FICOM,        "ficom"         },
+       { FICOMP,       "ficomp"        },
+       { FIDIVL,       "fidivl"        },
+       { FIDIVRL,      "fidivrl"       },
+       { FIDIVRS,      "fidivrs"       },
+       { FIDIVS,       "fidivs"        },
+       { FILDL,        "fildl"         },
+       { FILDQ,        "fildq"         },
+       { FILDS,        "filds"         },
+       { FIMULL,       "fimull"        },
+       { FIMULS,       "fimuls"        },
+       { FINCSTP,      "fincstp"       },
+       { FINIT,        "finit"         },
+       { FISTL,        "fistl"         },
+       { FISTP,        "fistp"         },
+       { FISTS,        "fists"         },
+       { FISUBL,       "fisubl"        },
+       { FISUBRL,      "fisubrl"       },
+       { FISUBRS,      "fisubrs"       },
+       { FISUBS,       "fisubs"        },
+       { FLD1,         "fld1"          },
+       { FLDCW,        "fldcw"         },
+       { FLDD,         "fldd"          },
+       { FLDENV,       "fldenv"        },
+       { FLDL2E,       "fldl2e"        },
+       { FLDL2T,       "fldl2t"        },
+       { FLDLG2,       "fldlg2"        },
+       { FLDLN2,       "fldln2"        },
+       { FLDPI,        "fldpi"         },
+       { FLDS,         "flds"          },
+       { FLDX,         "fldx"          },
+       { FLDZ,         "fldz"          },
+       { FMULD,        "fmuld"         },
+       { FMULP,        "fmulp"         },
+       { FMULS,        "fmuls"         },
+       { FNOP,         "fnop"          },
+       { FPATAN,       "fpatan"        },
+       { FPREM,        "fprem"         },
+       { FPREM1,       "fprem1"        },
+       { FPTAN,        "fptan"         },
+       { FRNDINT,      "frndint"       },
+       { FRSTOR,       "frstor"        },
+       { FSAVE,        "fsave"         },
+       { FSCALE,       "fscale"        },
+       { FSIN,         "fsin"          },
+       { FSINCOS,      "fsincos"       },
+       { FSQRT,        "fsqrt"         },
+       { FSTCW,        "fstcw"         },
+       { FSTD,         "fstd"          },
+       { FSTENV,       "fstenv"        },
+       { FSTPD,        "fstpd"         },
+       { FSTPS,        "fstps"         },
+       { FSTPX,        "fstpx"         },
+       { FSTS,         "fsts"          },
+       { FSTSW,        "fstsw"         },
+       { FSUBD,        "fsubd"         },
+       { FSUBP,        "fsubp"         },
+       { FSUBPR,       "fsubpr"        },
+       { FSUBRD,       "fsubrd"        },
+       { FSUBRS,       "fsubrs"        },
+       { FSUBS,        "fsubs"         },
+       { FTST,         "ftst"          },
+       { FUCOM,        "fucom"         },
+       { FUCOMP,       "fucomp"        },
+       { FUCOMPP,      "fucompp"       },
+       { FXAM,         "fxam"          },
+       { FXCH,         "fxch"          },
+       { FXTRACT,      "fxtract"       },
+       { FYL2X,        "fyl2x"         },
+       { FYL2XP1,      "fyl2xp1"       },
+       { HLT,          "hlt"           },
+       { IDIV,         "idiv%"         },
+       { IMUL,         "imul%"         },
+       { IN,           "in%"           },
+       { INC,          "inc%"          },
+       { INS,          "ins%"          },
+       { INT,          "int"           },
+       { INTO,         "into"          },
+       { INVD,         "invd"          },
+       { INVLPG,       "invlpg"        },
+       { IRET,         "iret"          },
+       { IRETD,        "iretd"         },
+       { JA,           "ja"            },
+       { JAE,          "jae"           },
+       { JB,           "jb"            },
+       { JBE,          "jbe"           },
+       { JCXZ,         "jcxz"          },
+       { JE,           "je"            },
+       { JG,           "jg"            },
+       { JGE,          "jge"           },
+       { JL,           "jl"            },
+       { JLE,          "jle"           },
+       { JMP,          "jmp"           },
+       { JMPF,         "jmpf"          },
+       { JNE,          "jne"           },
+       { JNO,          "jno"           },
+       { JNP,          "jnp"           },
+       { JNS,          "jns"           },
+       { JO,           "jo"            },
+       { JP,           "jp"            },
+       { JS,           "js"            },
+       { LAHF,         "lahf"          },
+       { LAR,          "lar"           },
+       { LDS,          "lds"           },
+       { LEA,          "lea"           },
+       { LEAVE,        "leave"         },
+       { LES,          "les"           },
+       { LFS,          "lfs"           },
+       { LGDT,         "lgdt"          },
+       { LGS,          "lgs"           },
+       { LIDT,         "lidt"          },
+       { LLDT,         "lldt"          },
+       { LMSW,         "lmsw"          },
+       { LOCK,         "lock"          },
+       { LODS,         "lods%"         },
+       { LOOP,         "loop"          },
+       { LOOPE,        "loope"         },
+       { LOOPNE,       "loopne"        },
+       { LSL,          "lsl"           },
+       { LSS,          "lss"           },
+       { LTR,          "ltr"           },
+       { MOV,          "mov%"          },
+       { MOVS,         "movs%"         },
+       { MOVSX,        "movsx"         },
+       { MOVSXB,       "movsxb"        },
+       { MOVZX,        "movzx"         },
+       { MOVZXB,       "movzxb"        },
+       { MUL,          "mul%"          },
+       { NEG,          "neg%"          },
+       { NOP,          "nop"           },
+       { NOT,          "not%"          },
+       { OR,           "or%"           },
+       { OUT,          "out%"          },
+       { OUTS,         "outs%"         },
+       { POP,          "pop"           },
+       { POPA,         "popa"          },
+       { POPAD,        "popad"         },
+       { POPF,         "popf"          },
+       { PUSH,         "push"          },
+       { PUSHA,        "pusha"         },
+       { PUSHAD,       "pushad"        },
+       { PUSHF,        "pushf"         },
+       { RCL,          "rcl%"          },
+       { RCR,          "rcr%"          },
+       { RET,          "ret"           },
+       { RETF,         "retf"          },
+       { ROL,          "rol%"          },
+       { ROR,          "ror%"          },
+       { SAHF,         "sahf"          },
+       { SAL,          "sal%"          },
+       { SAR,          "sar%"          },
+       { SBB,          "sbb%"          },
+       { SCAS,         "scas%"         },
+       { SETA,         "seta"          },
+       { SETAE,        "setae"         },
+       { SETB,         "setb"          },
+       { SETBE,        "setbe"         },
+       { SETE,         "sete"          },
+       { SETG,         "setg"          },
+       { SETGE,        "setge"         },
+       { SETL,         "setl"          },
+       { SETLE,        "setle"         },
+       { SETNE,        "setne"         },
+       { SETNO,        "setno"         },
+       { SETNP,        "setnp"         },
+       { SETNS,        "setns"         },
+       { SETO,         "seto"          },
+       { SETP,         "setp"          },
+       { SETS,         "sets"          },
+       { SGDT,         "sgdt"          },
+       { SHL,          "shl%"          },
+       { SHLD,         "shld"          },
+       { SHR,          "shr%"          },
+       { SHRD,         "shrd"          },
+       { SIDT,         "sidt"          },
+       { SLDT,         "sldt"          },
+       { SMSW,         "smsw"          },
+       { STC,          "stc"           },
+       { STD,          "std"           },
+       { STI,          "sti"           },
+       { STOS,         "stos%"         },
+       { STR,          "str"           },
+       { SUB,          "sub%"          },
+       { TEST,         "test%"         },
+       { VERR,         "verr"          },
+       { VERW,         "verw"          },
+       { WAIT,         "wait"          },
+       { WBINVD,       "wbinvd"        },
+       { XADD,         "xadd"          },
+       { XCHG,         "xchg%"         },
+       { XLAT,         "xlat"          },
+       { XOR,          "xor%"          },
+};
+
+#define farjmp(o)      ((o) == JMPF || (o) == CALLF)
+
+static FILE *ef;
+static long eline= 1;
+static char *efile;
+static char *orig_efile;
+static char *opcode2name_tab[N_OPCODES];
+static enum dialect { ACK, NCC } dialect= ACK;
+
+static void ack_putchar(int c)
+/* LOOK, this programmer checks the return code of putc!  What an idiot, noone
+ * does that!
+ */
+{
+       if (putc(c, ef) == EOF) fatal(orig_efile);
+}
+
+static void ack_printf(const char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+       if (vfprintf(ef, fmt, ap) == EOF) fatal(orig_efile);
+       va_end(ap);
+}
+
+void ack_emit_init(char *file, const char *banner)
+/* Prepare producing an ACK assembly file. */
+{
+       mnemonic_t *mp;
+
+       if (file == nil) {
+               file= "stdout";
+               ef= stdout;
+       } else {
+               if ((ef= fopen(file, "w")) == nil) fatal(file);
+       }
+       orig_efile= file;
+       efile= file;
+       ack_printf("! %s", banner);
+       if (dialect == ACK) {
+               /* Declare the four sections used under Minix. */
+               ack_printf(
+       "\n.sect .text; .sect .rom; .sect .data; .sect .bss\n.sect .text");
+       }
+
+       /* Initialize the opcode to mnemonic translation table. */
+       for (mp= mnemtab; mp < arraylimit(mnemtab); mp++) {
+               assert(opcode2name_tab[mp->opcode] == nil);
+               opcode2name_tab[mp->opcode]= mp->name;
+       }
+}
+
+#define opcode2name(op)                (opcode2name_tab[op] + 0)
+
+static void ack_put_string(const char *s, size_t n)
+/* Emit a string with weird characters quoted. */
+{
+       while (n > 0) {
+               int c= *s;
+
+               if (c < ' ' || c > 0177) {
+                       ack_printf("\\%03o", c & 0xFF);
+               } else
+               if (c == '"' || c == '\\') {
+                       ack_printf("\\%c", c);
+               } else {
+                       ack_putchar(c);
+               }
+               s++;
+               n--;
+       }
+}
+
+static void ack_put_expression(asm86_t *a, expression_t *e, int deref)
+/* Send an expression, i.e. instruction operands, to the output file.  Deref
+ * is true when the rewrite for the ncc dialect may be made.
+ */
+{
+       int isglob;
+
+       assert(e != nil);
+
+       isglob = syms_is_global(e->name);
+
+       switch (e->operator) {
+       case ',':
+               if (dialect == NCC && farjmp(a->opcode)) {
+                       /* ACK jmpf seg:off  ->  NCC jmpf off,seg */
+                       ack_put_expression(a, e->right, deref);
+                       ack_printf(", ");
+                       ack_put_expression(a, e->left, deref);
+               } else {
+                       ack_put_expression(a, e->left, deref);
+                       ack_printf(farjmp(a->opcode) ? ":" : ", ");
+                       ack_put_expression(a, e->right, deref);
+               }
+               break;
+       case 'O':
+               if (deref && a->optype == JUMP) ack_putchar('@');
+               if (e->left != nil) ack_put_expression(a, e->left, 0);
+               if (e->middle != nil) ack_put_expression(a, e->middle, 0);
+               if (e->right != nil) ack_put_expression(a, e->right, 0);
+               break;
+       case '(':
+               if (deref && a->optype == JUMP) ack_putchar('@');
+               if (!deref) ack_putchar('(');
+               ack_put_expression(a, e->middle, 0);
+               if (!deref) ack_putchar(')');
+               break;
+       case 'B':
+               ack_printf("(%s%s)", isglob ? "_" : "", e->name);
+               break;
+       case '1':
+       case '2':
+       case '4':
+       case '8':
+               ack_printf((use16() && e->operator == '1')
+                               ? "(%s%s)" : "(%s%s*%c)", isglob ? "_" : "",
+                               e->name, e->operator);
+               break;
+       case '+':
+       case '-':
+       case '~':
+               if (e->middle != nil) {
+                       if (deref && a->optype != JUMP) ack_putchar('#');
+                       ack_putchar(e->operator);
+                       ack_put_expression(a, e->middle, 0);
+                       break;
+               }
+               /*FALL THROUGH*/
+       case '*':
+       case '/':
+       case '%':
+       case '&':
+       case '|':
+       case '^':
+       case S_LEFTSHIFT:
+       case S_RIGHTSHIFT:
+               if (deref && a->optype != JUMP) ack_putchar('#');
+               ack_put_expression(a, e->left, 0);
+               if (e->operator == S_LEFTSHIFT) {
+                       ack_printf("<<");
+               } else
+               if (e->operator == S_RIGHTSHIFT) {
+                       ack_printf(">>");
+               } else {
+                       ack_putchar(e->operator);
+               }
+               ack_put_expression(a, e->right, 0);
+               break;
+       case '[':
+               if (deref && a->optype != JUMP) ack_putchar('#');
+               ack_putchar('[');
+               ack_put_expression(a, e->middle, 0);
+               ack_putchar(']');
+               break;
+       case 'W':
+               if (deref && a->optype == JUMP && isregister(e->name))
+               {
+                       ack_printf("(%s)", e->name);
+                       break;
+               }
+               if (deref && a->optype != JUMP && !isregister(e->name)) {
+                       ack_putchar('#');
+               }
+               ack_printf("%s%s", isglob ? "_" : "",  e->name);
+               break;
+       case 'S':
+               ack_putchar('"');
+               ack_put_string(e->name, e->len);
+               ack_putchar('"');
+               break;
+       default:
+               fprintf(stderr,
+               "asmconv: internal error, unknown expression operator '%d'\n",
+                       e->operator);
+               exit(EXIT_FAILURE);
+       }
+}
+
+void ack_emit_instruction(asm86_t *a)
+/* Output one instruction and its operands. */
+{
+       int same= 0;
+       char *p;
+       static int high_seg;
+       int deref;
+       static int prevop;
+
+       if (a == nil) {
+               /* Last call */
+               ack_putchar('\n');
+               return;
+       }
+
+       /* Make sure the line number of the line to be emitted is ok. */
+       if ((a->file != efile && strcmp(a->file, efile) != 0)
+                               || a->line < eline || a->line > eline+10) {
+               ack_putchar('\n');
+               ack_printf("! %ld \"%s\"\n", a->line, a->file);
+               efile= a->file;
+               eline= a->line;
+       } else {
+               if (a->line == eline) {
+                       if (prevop == DOT_LABEL) {
+                               ack_printf("\n");
+                               same = 0;
+                       }
+                       else {
+                               ack_printf("; ");
+                               same= 1;
+                       }
+               }
+               while (eline < a->line) {
+                       ack_putchar('\n');
+                       eline++;
+               }
+       }
+       
+       if (a->opcode == DOT_LABEL)
+               prevop = DOT_LABEL;
+       else
+               prevop = 0;
+
+       if (a->opcode == COMMENT || 
+                       a->opcode == C_PREPROCESSOR ||
+                       a->opcode == UNKNOWN) {
+               if (a->opcode == COMMENT)
+                       if (a->raw_string[0] != '/')
+                               a->raw_string[0] = '!';
+               ack_printf("%s", a->raw_string);
+               return;
+       } else
+       if (a->opcode == DOT_LABEL) {
+               assert(a->args->operator == ':');
+               if (syms_is_global(a->args->name))
+                       ack_printf("_%s:", a->args->name);
+               else
+                       ack_printf("%s:", a->args->name);
+       } else
+       if (a->opcode == DOT_EQU) {
+               assert(a->args->operator == '=');
+               ack_printf("\t%s = ", a->args->name);
+               ack_put_expression(a, a->args->middle, 0);
+       } else
+       if ((p= opcode2name(a->opcode)) != nil) {
+               char *sep= dialect == ACK ? "" : ";";
+
+               if (!is_pseudo(a->opcode) && !same) ack_putchar('\t');
+
+               switch (a->rep) {
+               case ONCE:      break;
+               case REP:       ack_printf("rep");      break;
+               case REPE:      ack_printf("repe");     break;
+               case REPNE:     ack_printf("repne");    break;
+               default:        assert(0);
+               }
+               if (a->rep != ONCE) {
+                       ack_printf(dialect == ACK ? " " : "; ");
+               }
+               switch (a->seg) {
+               case DEFSEG:    break;
+               case CSEG:      ack_printf("cseg");     break;
+               case DSEG:      ack_printf("dseg");     break;
+               case ESEG:      ack_printf("eseg");     break;
+               case FSEG:      ack_printf("fseg");     break;
+               case GSEG:      ack_printf("gseg");     break;
+               case SSEG:      ack_printf("sseg");     break;
+               default:        assert(0);
+               }
+               if (a->seg != DEFSEG) {
+                       ack_printf(dialect == ACK ? " " : "; ");
+               }
+               if (a->oaz & OPZ) ack_printf(use16() ? "o32 " : "o16 ");
+               if (a->oaz & ADZ) ack_printf(use16() ? "a32 " : "a16 ");
+
+               if (a->opcode == CBW) {
+                       p= !(a->oaz & OPZ) == use16() ? "cbw" : "cwde";
+               }
+
+               if (a->opcode == CWD) {
+                       p= !(a->oaz & OPZ) == use16() ? "cwd" : "cdq";
+               }
+
+               if (a->opcode == DOT_COMM && a->args != nil
+                       && a->args->operator == ','
+                       && a->args->left->operator == 'W'
+               ) {
+                       if (syms_is_global(a->args->left->name))
+                               ack_printf(".define\t_%s; ", a->args->left->name);
+                       else
+                               ack_printf(".define\t%s; ", a->args->left->name);
+               }
+#define IS_OP_CR4(r)   ((r) && (r)->name && strcmp((r)->name, "cr4") == 0)
+               /* unsupported mov to/from cr4 */
+               if (a->opcode == MOV && a->args->operator == ',') {
+                       if (IS_OP_CR4(a->args->left)) {
+                               /* read cr4 */
+                               ack_printf(".data1  0x0f, 0x20, 0xe0\n");
+                               return;
+                       }
+                       if (IS_OP_CR4(a->args->right)) {
+                               /* write cr4 */
+                               ack_printf(".data1  0x0f, 0x22, 0xe0\n");
+                               return;
+                       }
+               }
+               /* we are translating from GNU */
+               if (a->args && a->args->operator == ','
+                               /* don't swap ljmp prefixed with segment */
+                               && a->opcode != JMPF) {
+                       expression_t * tmp;
+
+                       tmp = a->args->right;
+                       a->args->right = a->args->left;
+                       a->args->left = tmp;
+               }
+               while (*p != 0) {
+                       if (*p == '%') {
+                               if (a->optype == BYTE) ack_putchar('b');
+                       } else {
+                               ack_putchar(*p);
+                       }
+                       p++;
+               }
+               /* 
+                * gnu assembly expresses the ES segment in the long instruction
+                * format. Not neccessary in ack
+                */
+               if (a->opcode == MOVS)
+                       return;
+               if (a->args != nil) {
+                       ack_putchar('\t');
+                       switch (a->opcode) {
+                       case IN:
+                       case OUT:
+                       case INT:
+                               deref= 0;
+                               break;
+                       default:
+                               deref= (dialect == NCC && a->optype != PSEUDO);
+                       }
+                       ack_put_expression(a, a->args, deref);
+               }
+               if (a->opcode == DOT_USE16) set_use16();
+               if (a->opcode == DOT_USE32) set_use32();
+       } else {
+               fprintf(stderr,
+                       "asmconv: internal error, unknown opcode '%d'\n",
+                       a->opcode);
+               exit(EXIT_FAILURE);
+       }
+       
+       /* 
+        * comment on the same line as an instruction. Cannot be definition of a
+        * macro
+        * */
+       if (a->raw_string) {
+               if (a->raw_string[0] != '/')
+                       a->raw_string[0] = '!';
+               ack_printf("\t%s", a->raw_string);
+       }
+}
+
+/* A few ncc mnemonics are different. */
+static mnemonic_t ncc_mnemtab[] = {
+       { DOT_BSS,      ".bss"          },
+       { DOT_DATA,     ".data"         },
+       { DOT_END,      ".end"          },
+       { DOT_ROM,      ".rom"          },
+       { DOT_TEXT,     ".text"         },
+};
+
+void ncc_emit_init(char *file, const char *banner)
+/* The assembly produced by the Minix ACK ANSI C compiler for the 8086 is
+ * different from the normal ACK assembly, and different from the old K&R
+ * assembler.  This brings us endless joy.  (It was supposed to make
+ * translation of the assembly used by the old K&R assembler easier by
+ * not deviating too much from that dialect.)
+ */
+{
+       mnemonic_t *mp;
+
+       dialect= NCC;
+       ack_emit_init(file, banner);
+
+       /* Replace a few mnemonics. */
+       for (mp= ncc_mnemtab; mp < arraylimit(ncc_mnemtab); mp++) {
+               opcode2name_tab[mp->opcode]= mp->name;
+       }
+}
+
+void ncc_emit_instruction(asm86_t *a)
+{
+       ack_emit_instruction(a);
+}
diff --git a/commands/i386/gas2ack/gas2ack.c b/commands/i386/gas2ack/gas2ack.c
new file mode 100644 (file)
index 0000000..422e50a
--- /dev/null
@@ -0,0 +1,116 @@
+/*     asmconv 1.11 - convert 80X86 assembly           Author: Kees J. Bot
+ *                                                             24 Dec 1993
+ */
+static char version[] = "1.11";
+
+#define nil 0
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include "asmconv.h"
+#include "asm86.h"
+#include "languages.h"
+
+void fatal(char *label)
+{
+       fprintf(stderr, "asmconv: %s: %s\n", label, strerror(errno));
+       exit(EXIT_FAILURE);
+}
+
+void *allocate(void *mem, size_t size)
+/* A checked malloc/realloc().  Yes, I know ISO C allows realloc(NULL, size). */
+{
+       mem= mem == nil ? malloc(size) : realloc(mem, size);
+       if (mem == nil) fatal("malloc()");
+       return mem;
+}
+
+void deallocate(void *mem)
+/* Free a malloc()d cell.  (Yes I know ISO C allows free(NULL) */
+{
+       if (mem != nil) free(mem);
+}
+
+char *copystr(const char *s)
+{
+       char *c;
+
+       c= allocate(nil, (strlen(s) + 1) * sizeof(s[0]));
+       strcpy(c, s);
+       return c;
+}
+
+int isanumber(const char *s)
+/* True if s can be turned into a number. */
+{
+       char *end;
+
+       (void) strtol(s, &end, 0);
+       return end != s && *end == 0;
+}
+
+/* "Invisible" globals. */
+int asm_mode32= (sizeof(int) == 4);
+int err_code= EXIT_SUCCESS;
+
+int main(int argc, char **argv)
+{
+       void (*parse_init)(char *file);
+       asm86_t *(*get_instruction)(void);
+       void (*emit_init)(char *file, const char *banner);
+       void (*emit_instruction)(asm86_t *instr);
+       char *lang_parse, *lang_emit, *input_file, *output_file;
+       asm86_t *instr;
+       char banner[80];
+
+       if (argc > 1 && argv[1][0] == '-' && argv[1][1] == 'm') {
+               if (strcmp(argv[1], "-mi86") == 0) {
+                       set_use16();
+               } else
+               if (strcmp(argv[1], "-mi386") == 0) {
+                       set_use32();
+               } else {
+                       fprintf(stderr, "asmconv: '%s': unknown machine\n",
+                               argv[1]+2);
+               }
+               argc--;
+               argv++;
+       }
+
+       if (argc > 3) {
+               fprintf(stderr, "Usage: gas2ack [input-file [output-file]]\n");
+               exit(EXIT_FAILURE);
+       }
+
+       input_file= argc < 1 ? nil : argv[1];
+       output_file= argc < 2 ? nil : argv[2];
+
+       parse_init= gnu_parse_init;
+       get_instruction= gnu_get_instruction;
+
+       emit_init= ack_emit_init;
+       emit_instruction= ack_emit_instruction;
+
+       sprintf(banner, "Translated from GNU to ACK by gas2ack");
+
+       /* get localy defined labels first */
+       (*parse_init)(input_file);
+       for (;;) {
+               instr= (*get_instruction)();
+               if (instr == nil) break;
+               del_asm86(instr);
+       }
+
+       (*parse_init)(input_file);
+       (*emit_init)(output_file, banner);
+       for (;;) {
+               instr= (*get_instruction)();
+               (*emit_instruction)(instr);
+               if (instr == nil) break;
+               del_asm86(instr);
+       }
+       exit(err_code);
+}
diff --git a/commands/i386/gas2ack/globals.c b/commands/i386/gas2ack/globals.c
new file mode 100644 (file)
index 0000000..80f105f
--- /dev/null
@@ -0,0 +1,134 @@
+/* 
+ * Table of all global definitions. Since the ack convention is to prepend
+ * syms with '_' for C interfacing, we need to know about them and add/remove
+ * teh '_' as neccessary
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#include "asm86.h"
+
+/* this should be fine for common minix assembly files */
+#define SYM_MAX                1024
+#define SYM_MAX_LEN    64
+
+struct sym {
+       char    name[SYM_MAX_LEN];
+       int     gl;
+};
+
+static struct sym syms[SYM_MAX];
+
+static int syms_num = 0;
+
+static struct sym * sym_exists(const char * n)
+{
+       int i;
+
+       for (i = 0; i < syms_num; i++) {
+               if (strcmp(syms[i].name, n) == 0)
+                       return &syms[i];
+       }
+
+       return NULL;
+}
+
+static int is_local_label_ref(const char *n)
+{
+       int i;
+       int l = strlen(n);
+
+       for(i = 0; i < l - 1; i++)
+               if (!isdigit(n[i]))
+                       return 0;
+       if (n[l-1] != 'b' && n[l-1] != 'f')
+               return 0;
+
+       return 1;
+}
+
+static int is_hex(const char *n)
+{
+       int i;
+       for(i = 0; n[i]; i++)
+               if (!isxdigit(n[i]))
+                       return 0;
+       return 1;
+}
+
+static int is_dec(const char *n)
+{
+       int i;
+       for(i = 0; n[i]; i++)
+               if (!isdigit(n[i]))
+                       return 0;
+       return 1;
+}
+
+static int is_number(const char * n)
+{
+       if (n[0] == '0' && n[1] == 'x')
+               return is_hex(n + 2);
+       else
+               return is_dec(n);
+}
+
+int syms_is_global(const char * n)
+{
+       struct sym *s;
+
+       if (!n || is_number(n) || is_local_label_ref(n) || isregister(n))
+               return 0;
+       
+       /* if not found, it must be extern -> global */
+       if (!(s = sym_exists(n)))
+               return 1;
+
+       return s->gl;
+}
+
+static int add(const char * n, int isgl)
+{
+       if (syms_num >= SYM_MAX)
+               return -ENOMEM;
+       if (!n || strlen(n) >= SYM_MAX_LEN)
+               return -EINVAL;
+
+       /* ignore numbers */
+       if (is_number(n))
+               return 0;
+
+       strcpy(syms[syms_num].name, n);
+       syms[syms_num].gl = isgl;
+       syms_num++;
+
+       return 0;
+}
+
+int syms_add(const char *n)
+{
+       return add(n, 0);
+}
+
+int syms_add_global(const char *n)
+{
+       return add(n, 1);
+}
+
+void syms_add_global_csl(expression_t * exp)
+{
+       if (!exp)
+               return;
+
+       if (exp->operator == ',') {
+               syms_add_global_csl(exp->left);
+               syms_add_global_csl(exp->right);
+       }
+       else {
+               syms_add_global(exp->name);
+       }
+}
+
diff --git a/commands/i386/gas2ack/globals.h b/commands/i386/gas2ack/globals.h
new file mode 100644 (file)
index 0000000..e3e8d6e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __GLOBALS_H__
+#define __GLOBALS_H__
+
+int syms_is_global(const char * gl);
+int syms_add(const char * gl);
+int syms_add_global(const char * gl);
+void syms_add_global_csl(expression_t * exp);
+
+#endif
diff --git a/commands/i386/gas2ack/languages.h b/commands/i386/gas2ack/languages.h
new file mode 100644 (file)
index 0000000..a34ec9f
--- /dev/null
@@ -0,0 +1,25 @@
+/*     languages.h - functions that parse or emit assembly
+ *                                                     Author: Kees J. Bot
+ *                                                             27 Dec 1993
+ */
+
+void ack_parse_init(char *file);
+asm86_t *ack_get_instruction(void);
+
+void ncc_parse_init(char *file);
+asm86_t *ncc_get_instruction(void);
+
+void gnu_parse_init(char *file);
+asm86_t *gnu_get_instruction(void);
+
+void bas_parse_init(char *file);
+asm86_t *bas_get_instruction(void);
+
+void ack_emit_init(char *file, const char *banner);
+void ack_emit_instruction(asm86_t *instr);
+
+void ncc_emit_init(char *file, const char *banner);
+void ncc_emit_instruction(asm86_t *instr);
+
+void gnu_emit_init(char *file, const char *banner);
+void gnu_emit_instruction(asm86_t *instr);
diff --git a/commands/i386/gas2ack/parse_gnu.c b/commands/i386/gas2ack/parse_gnu.c
new file mode 100644 (file)
index 0000000..56960b5
--- /dev/null
@@ -0,0 +1,1017 @@
+/*     parse_ack.c - parse GNU assembly                Author: R.S. Veldema
+ *                                                      <rveldema@cs.vu.nl>
+ *                                                             26 Aug 1996
+ */
+#define nil 0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include "asmconv.h"
+#include "token.h"
+#include "asm86.h"
+#include "languages.h"
+#include "globals.h"
+
+typedef struct mnemonic {      /* GNU as86 mnemonics translation table. */
+       char            *name;
+       opcode_t        opcode;
+       optype_t        optype;
+} mnemonic_t;
+
+static mnemonic_t mnemtab[] = {                        /* This array is sorted. */
+       { ".align",     DOT_ALIGN,      PSEUDO },
+       { ".ascii",     DOT_ASCII,      PSEUDO },
+       { ".asciz",     DOT_ASCIZ,      PSEUDO },
+       { ".assert",    DOT_ASSERT,     PSEUDO },
+       { ".balign",    DOT_ALIGN,      PSEUDO },
+       { ".base",      DOT_BASE,       PSEUDO },
+       { ".bss",       DOT_BSS,        PSEUDO },
+       { ".byte",      DOT_DATA1,      PSEUDO },
+       { ".comm",      DOT_COMM,       PSEUDO },
+       { ".data",      DOT_DATA,       PSEUDO },
+       { ".data1",     DOT_DATA1,      PSEUDO },
+       { ".data2",     DOT_DATA2,      PSEUDO },
+       { ".data4",     DOT_DATA4,      PSEUDO },
+       { ".end",       DOT_END,        PSEUDO },
+       { ".extern",    DOT_EXTERN,     PSEUDO },
+       { ".file",      DOT_FILE,       PSEUDO },
+       { ".global",    DOT_DEFINE,     PSEUDO },
+       { ".globl",     DOT_DEFINE,     PSEUDO },
+       { ".lcomm",     DOT_LCOMM,      PSEUDO },
+       { ".line",      DOT_LINE,       PSEUDO },
+       { ".list",      DOT_LIST,       PSEUDO },
+       { ".long",      DOT_DATA4,      PSEUDO },
+       { ".nolist",    DOT_NOLIST,     PSEUDO },
+       { ".rom",       DOT_ROM,        PSEUDO },
+       { ".short",     DOT_DATA2,      PSEUDO },
+       { ".space",     DOT_SPACE,      PSEUDO },
+       { ".symb",      DOT_SYMB,       PSEUDO },
+       { ".text",      DOT_TEXT,       PSEUDO },
+       { ".word",      DOT_DATA2,      PSEUDO },
+       { "aaa",        AAA,            WORD },
+       { "aad",        AAD,            WORD },
+       { "aam",        AAM,            WORD },
+       { "aas",        AAS,            WORD },
+       { "adcb",       ADC,            BYTE },
+       { "adcl",       ADC,            WORD },
+       { "adcw",       ADC,            OWORD },
+       { "addb",       ADD,            BYTE },
+       { "addl",       ADD,            WORD },
+       { "addw",       ADD,            OWORD },
+       { "andb",       AND,            BYTE },
+       { "andl",       AND,            WORD },
+       { "andw",       AND,            OWORD },
+       { "arpl",       ARPL,           WORD },
+       { "bound",      BOUND,          WORD },
+       { "bsf",        BSF,            WORD },
+       { "bsr",        BSR,            WORD },
+       { "bswap",      BSWAP,          WORD },
+       { "btc",        BTC,            WORD },
+       { "btl",        BT,             WORD },
+       { "btr",        BTR,            WORD },
+       { "bts",        BTS,            WORD },
+       { "btw",        BT,             OWORD },
+       { "call",       CALL,           JUMP },
+       { "callf",      CALLF,          JUMP },
+       { "cbtw",       CBW,            OWORD },
+       { "cbw",        CBW,            WORD },
+       { "cdq",        CWD,            WORD },
+       { "clc",        CLC,            WORD },
+       { "cld",        CLD,            WORD },
+       { "cli",        CLI,            WORD },
+       { "cltd",       CWD,            WORD },
+       { "clts",       CLTS,           WORD },
+       { "cmc",        CMC,            WORD },
+       { "cmpb",       CMP,            BYTE },
+       { "cmpl",       CMP,            WORD },
+       { "cmps",       CMPS,           WORD },
+       { "cmpsb",      CMPS,           BYTE },
+       { "cmpsl",      CMPS,           OWORD },
+       { "cmpsw",      CMPS,           WORD },
+       { "cmpw",       CMP,            OWORD },
+       { "cmpxchg",    CMPXCHG,        WORD },
+       { "cpuid",      CPUID,          WORD },
+       { "cwd",        CWD,            WORD },
+       { "cwde",       CBW,            WORD },
+       { "cwtd",       CWD,            OWORD },
+       { "cwtl",       CBW,            WORD },
+       { "daa",        DAA,            WORD },
+       { "das",        DAS,            WORD },
+       { "decb",       DEC,            BYTE },
+       { "decl",       DEC,            WORD },
+       { "decw",       DEC,            OWORD },
+       { "divb",       DIV,            BYTE },
+       { "divl",       DIV,            WORD },
+       { "divw",       DIV,            OWORD },
+       { "enter",      ENTER,          WORD },
+       { "f2xm1",      F2XM1,          WORD },
+       { "fabs",       FABS,           WORD },
+       { "fadd",       FADD,           WORD },
+       { "faddd",      FADDD,          WORD },
+       { "faddp",      FADDP,          WORD },
+       { "fadds",      FADDS,          WORD },
+       { "fbld",       FBLD,           WORD },
+       { "fbstp",      FBSTP,          WORD },
+       { "fchs",       FCHS,           WORD },
+       { "fcomd",      FCOMD,          WORD },
+       { "fcompd",     FCOMPD,         WORD },
+       { "fcompp",     FCOMPP,         WORD },
+       { "fcomps",     FCOMPS,         WORD },
+       { "fcoms",      FCOMS,          WORD },
+       { "fcos",       FCOS,           WORD },
+       { "fdecstp",    FDECSTP,        WORD },
+       { "fdivd",      FDIVD,          WORD },
+       { "fdivp",      FDIVP,          WORD },
+       { "fdivrd",     FDIVRD,         WORD },
+       { "fdivrp",     FDIVRP,         WORD },
+       { "fdivrs",     FDIVRS,         WORD },
+       { "fdivs",      FDIVS,          WORD },
+       { "ffree",      FFREE,          WORD },
+       { "fiaddl",     FIADDL,         WORD },
+       { "fiadds",     FIADDS,         WORD },
+       { "ficom",      FICOM,          WORD },
+       { "ficomp",     FICOMP,         WORD },
+       { "fidivl",     FIDIVL,         WORD },
+       { "fidivrl",    FIDIVRL,        WORD },
+       { "fidivrs",    FIDIVRS,        WORD },
+       { "fidivs",     FIDIVS,         WORD },
+       { "fildl",      FILDL,          WORD },
+       { "fildq",      FILDQ,          WORD },
+       { "filds",      FILDS,          WORD },
+       { "fimull",     FIMULL,         WORD },
+       { "fimuls",     FIMULS,         WORD },
+       { "fincstp",    FINCSTP,        WORD },
+       { "fistl",      FISTL,          WORD },
+       { "fistp",      FISTP,          WORD },
+       { "fists",      FISTS,          WORD },
+       { "fisubl",     FISUBL,         WORD },
+       { "fisubrl",    FISUBRL,        WORD },
+       { "fisubrs",    FISUBRS,        WORD },
+       { "fisubs",     FISUBS,         WORD },
+       { "fld1",       FLD1,           WORD },
+       { "fldcw",      FLDCW,          WORD },
+       { "fldd",       FLDD,           WORD },
+       { "fldenv",     FLDENV,         WORD },
+       { "fldl2e",     FLDL2E,         WORD },
+       { "fldl2t",     FLDL2T,         WORD },
+       { "fldlg2",     FLDLG2,         WORD },
+       { "fldln2",     FLDLN2,         WORD },
+       { "fldpi",      FLDPI,          WORD },
+       { "flds",       FLDS,           WORD },
+       { "fldx",       FLDX,           WORD },
+       { "fldz",       FLDZ,           WORD },
+       { "fmuld",      FMULD,          WORD },
+       { "fmulp",      FMULP,          WORD },
+       { "fmuls",      FMULS,          WORD },
+       { "fnclex",     FCLEX,          WORD },
+       { "fninit",     FINIT,          WORD },
+       { "fnop",       FNOP,           WORD },
+       { "fnsave",     FSAVE,          WORD },
+       { "fnstcw",     FSTCW,          WORD },
+       { "fnstenv",    FSTENV,         WORD },
+       { "fpatan",     FPATAN,         WORD },
+       { "fprem",      FPREM,          WORD },
+       { "fprem1",     FPREM1,         WORD },
+       { "fptan",      FPTAN,          WORD },
+       { "frndint",    FRNDINT,        WORD },
+       { "frstor",     FRSTOR,         WORD },
+       { "fscale",     FSCALE,         WORD },
+       { "fsin",       FSIN,           WORD },
+       { "fsincos",    FSINCOS,        WORD },
+       { "fsqrt",      FSQRT,          WORD },
+       { "fstd",       FSTD,           WORD },
+       { "fstpd",      FSTPD,          WORD },
+       { "fstps",      FSTPS,          WORD },
+       { "fstpx",      FSTPX,          WORD },
+       { "fsts",       FSTS,           WORD },
+       { "fstsw",      FSTSW,          WORD },
+       { "fsubd",      FSUBD,          WORD },
+       { "fsubp",      FSUBP,          WORD },
+       { "fsubpr",     FSUBPR,         WORD },
+       { "fsubrd",     FSUBRD,         WORD },
+       { "fsubrs",     FSUBRS,         WORD },
+       { "fsubs",      FSUBS,          WORD },
+       { "ftst",       FTST,           WORD },
+       { "fucom",      FUCOM,          WORD },
+       { "fucomp",     FUCOMP,         WORD },
+       { "fucompp",    FUCOMPP,        WORD },
+       { "fxam",       FXAM,           WORD },
+       { "fxch",       FXCH,           WORD },
+       { "fxtract",    FXTRACT,        WORD },
+       { "fyl2x",      FYL2X,          WORD },
+       { "fyl2xp1",    FYL2XP1,        WORD },
+       { "hlt",        HLT,            WORD },
+       { "idivb",      IDIV,           BYTE },
+       { "idivl",      IDIV,           WORD },
+       { "idivw",      IDIV,           OWORD },
+       { "imulb",      IMUL,           BYTE },
+       { "imull",      IMUL,           WORD },
+       { "imulw",      IMUL,           OWORD },
+       { "inb",        IN,             BYTE },
+       { "incb",       INC,            BYTE },
+       { "incl",       INC,            WORD },
+       { "incw",       INC,            OWORD },
+       { "inl",        IN,             WORD },
+       { "insb",       INS,            BYTE },
+       { "insl",       INS,            WORD },
+       { "insw",       INS,            OWORD },
+       { "int",        INT,            WORD },
+       { "into",       INTO,           JUMP },
+       { "invd",       INVD,           WORD },
+       { "invlpg",     INVLPG,         WORD },
+       { "inw",        IN,             OWORD },
+       { "iret",       IRET,           JUMP },
+       { "iretd",      IRETD,          JUMP },
+       { "ja",         JA,             JUMP },
+       { "jae",        JAE,            JUMP },
+       { "jb",         JB,             JUMP },
+       { "jbe",        JBE,            JUMP },
+       { "jc",         JB,             JUMP },
+       { "jcxz",       JCXZ,           JUMP },
+       { "je",         JE,             JUMP },
+       { "jecxz",      JCXZ,           JUMP },
+       { "jg",         JG,             JUMP },
+       { "jge",        JGE,            JUMP },
+       { "jl",         JL,             JUMP },
+       { "jle",        JLE,            JUMP },
+       { "jmp",        JMP,            JUMP },
+       { "jmpf",       JMPF,           JUMP },
+       { "jna",        JBE,            JUMP },
+       { "jnae",       JB,             JUMP },
+       { "jnb",        JAE,            JUMP },
+       { "jnbe",       JA,             JUMP },
+       { "jnc",        JAE,            JUMP },
+       { "jne",        JNE,            JUMP },
+       { "jng",        JLE,            JUMP },
+       { "jnge",       JL,             JUMP },
+       { "jnl",        JGE,            JUMP },
+       { "jnle",       JG,             JUMP },
+       { "jno",        JNO,            JUMP },
+       { "jnp",        JNP,            JUMP },
+       { "jns",        JNS,            JUMP },
+       { "jnz",        JNE,            JUMP },
+       { "jo",         JO,             JUMP },
+       { "jp",         JP,             JUMP },
+       { "js",         JS,             JUMP },
+       { "jz",         JE,             JUMP },
+       { "lahf",       LAHF,           WORD },
+       { "lar",        LAR,            WORD },
+       { "lds",        LDS,            WORD },
+       { "leal",       LEA,            WORD },
+       { "leave",      LEAVE,          WORD },
+       { "leaw",       LEA,            OWORD },
+       { "les",        LES,            WORD },
+       { "lfs",        LFS,            WORD },
+       { "lgdt",       LGDT,           WORD },
+       { "lgs",        LGS,            WORD },
+       { "lidt",       LIDT,           WORD },
+       { "ljmp",       JMPF,           JUMP },
+       { "ljmpw",      JMPF,           JUMP16 },
+       { "lldt",       LLDT,           WORD },
+       { "lmsw",       LMSW,           WORD },
+       { "lock",       LOCK,           WORD },
+       { "lods",       LODS,           WORD },
+       { "lodsb",      LODS,           BYTE },
+       { "loop",       LOOP,           JUMP },
+       { "loope",      LOOPE,          JUMP },
+       { "loopne",     LOOPNE,         JUMP },
+       { "loopnz",     LOOPNE,         JUMP },
+       { "loopz",      LOOPE,          JUMP },
+       { "lretw",      RETF,           JUMP16 },
+       { "lsl",        LSL,            WORD },
+       { "lss",        LSS,            WORD },
+       { "ltr",        LTR,            WORD },
+       { "movb",       MOV,            BYTE },
+       { "movl",       MOV,            WORD },
+       { "movsb",      MOVS,           BYTE },
+       { "movsbl",     MOVSXB,         WORD },
+       { "movsbw",     MOVSXB,         OWORD },
+       { "movsl",      MOVS,           WORD },
+       { "movsw",      MOVS,           OWORD },
+       { "movswl",     MOVSX,          WORD },
+       { "movw",       MOV,            OWORD },
+       { "movzbl",     MOVZXB,         WORD },
+       { "movzbw",     MOVZXB,         OWORD },
+       { "movzwl",     MOVZX,          WORD },
+       { "mulb",       MUL,            BYTE },
+       { "mull",       MUL,            WORD },
+       { "mulw",       MUL,            OWORD },
+       { "negb",       NEG,            BYTE },
+       { "negl",       NEG,            WORD },
+       { "negw",       NEG,            OWORD },
+       { "nop",        NOP,            WORD },
+       { "notb",       NOT,            BYTE },
+       { "notl",       NOT,            WORD },
+       { "notw",       NOT,            OWORD },
+       { "orb",        OR,             BYTE },
+       { "orl",        OR,             WORD },
+       { "orw",        OR,             OWORD },
+       { "outb",       OUT,            BYTE },
+       { "outl",       OUT,            WORD },
+       { "outsb",      OUTS,           BYTE },
+       { "outsl",      OUTS,           WORD },
+       { "outsw",      OUTS,           OWORD },
+       { "outw",       OUT,            OWORD },
+       { "pop",        POP,            WORD },
+       { "popa",       POPA,           WORD },
+       { "popal",      POPAD,          WORD },
+       { "popf",       POPF,           WORD },
+       { "popfl",      POPF,           WORD },
+       { "popl",       POP,            WORD },
+       { "popw",       POP,            OWORD },
+       { "push",       PUSH,           WORD },
+       { "pusha",      PUSHA,          WORD },
+       { "pushal",     PUSHAD,         WORD },
+       { "pushf",      PUSHF,          WORD },
+       { "pushl",      PUSH,           WORD },
+       { "pushw",      PUSH,           OWORD },
+       { "rclb",       RCL,            BYTE },
+       { "rcll",       RCL,            WORD },
+       { "rclw",       RCL,            OWORD },
+       { "rcrb",       RCR,            BYTE },
+       { "rcrl",       RCR,            WORD },
+       { "rcrw",       RCR,            OWORD },
+       { "ret",        RET,            JUMP },
+       { "retf",       RETF,           JUMP },
+       { "rolb",       ROL,            BYTE },
+       { "roll",       ROL,            WORD },
+       { "rolw",       ROL,            OWORD },
+       { "rorb",       ROR,            BYTE },
+       { "rorl",       ROR,            WORD },
+       { "rorw",       ROR,            OWORD },
+       { "sahf",       SAHF,           WORD },
+       { "salb",       SAL,            BYTE },
+       { "sall",       SAL,            WORD },
+       { "salw",       SAL,            OWORD },
+       { "sarb",       SAR,            BYTE },
+       { "sarl",       SAR,            WORD },
+       { "sarw",       SAR,            OWORD },
+       { "sbbb",       SBB,            BYTE },
+       { "sbbl",       SBB,            WORD },
+       { "sbbw",       SBB,            OWORD },
+       { "scasb",      SCAS,           BYTE },
+       { "scasl",      SCAS,           WORD },
+       { "scasw",      SCAS,           OWORD },
+       { "seta",       SETA,           BYTE },
+       { "setae",      SETAE,          BYTE },
+       { "setb",       SETB,           BYTE },
+       { "setbe",      SETBE,          BYTE },
+       { "sete",       SETE,           BYTE },
+       { "setg",       SETG,           BYTE },
+       { "setge",      SETGE,          BYTE },
+       { "setl",       SETL,           BYTE },
+       { "setna",      SETBE,          BYTE },
+       { "setnae",     SETB,           BYTE },
+       { "setnb",      SETAE,          BYTE },
+       { "setnbe",     SETA,           BYTE },
+       { "setne",      SETNE,          BYTE },
+       { "setng",      SETLE,          BYTE },
+       { "setnge",     SETL,           BYTE },
+       { "setnl",      SETGE,          BYTE },
+       { "setnle",     SETG,           BYTE },
+       { "setno",      SETNO,          BYTE },
+       { "setnp",      SETNP,          BYTE },
+       { "setns",      SETNS,          BYTE },
+       { "seto",       SETO,           BYTE },
+       { "setp",       SETP,           BYTE },
+       { "sets",       SETS,           BYTE },
+       { "setz",       SETE,           BYTE },
+       { "sgdt",       SGDT,           WORD },
+       { "shlb",       SHL,            BYTE },
+       { "shldl",      SHLD,           WORD },
+       { "shll",       SHL,            WORD },
+       { "shlw",       SHL,            OWORD },
+       { "shrb",       SHR,            BYTE },
+       { "shrdl",      SHRD,           WORD },
+       { "shrl",       SHR,            WORD },
+       { "shrw",       SHR,            OWORD },
+       { "sidt",       SIDT,           WORD },
+       { "sldt",       SLDT,           WORD },
+       { "smsw",       SMSW,           WORD },
+       { "stc",        STC,            WORD },
+       { "std",        STD,            WORD },
+       { "sti",        STI,            WORD },
+       { "stosb",      STOS,           BYTE },
+       { "stosl",      STOS,           WORD },
+       { "stosw",      STOS,           OWORD },
+       { "str",        STR,            WORD },
+       { "subb",       SUB,            BYTE },
+       { "subl",       SUB,            WORD },
+       { "subw",       SUB,            OWORD },
+       { "testb",      TEST,           BYTE },
+       { "testl",      TEST,           WORD },
+       { "testw",      TEST,           OWORD },
+       { "verr",       VERR,           WORD },
+       { "verw",       VERW,           WORD },
+       { "wait",       WAIT,           WORD },
+       { "wbinvd",     WBINVD,         WORD },
+       { "xadd",       XADD,           WORD },
+       { "xchgb",      XCHG,           BYTE },
+       { "xchgl",      XCHG,           WORD },
+       { "xchgw",      XCHG,           OWORD },
+       { "xlat",       XLAT,           WORD },
+       { "xorb",       XOR,            BYTE },
+       { "xorl",       XOR,            WORD },
+       { "xorw",       XOR,            OWORD },
+};
+
+void gnu_parse_init(char *file)
+/* Prepare parsing of an GNU assembly file. */
+{
+       tok_init(file, '#');
+}
+
+static void zap(void)
+/* An error, zap the rest of the line. */
+{
+       token_t *t;
+
+       while ((t= get_token(0))->type != T_EOF && t->symbol != ';'
+                       && t->type != T_COMMENT)
+               skip_token(1);
+}
+
+/* same as in ACK */
+static int zap_unknown(asm86_t *a)
+/* An error, zap the rest of the line. */
+{
+       token_t *t;
+#define MAX_ASTR       4096
+       char astr[MAX_ASTR];
+       unsigned astr_len = 0;
+
+       astr[astr_len++] = '\t';
+       while ((t= get_token(0))->type != T_EOF && t->symbol != ';' 
+                       && t->type != T_COMMENT) {
+               switch(t->type) {
+                       case T_CHAR: 
+                               astr[astr_len++] = t->symbol;
+                               break;
+                       case T_WORD:
+                       case T_STRING:
+                               strncpy(astr + astr_len, t->name, t->len);
+                               astr_len += t->len;
+                               break;
+
+               }
+               skip_token(1);
+       }
+       astr[astr_len++] = '\0';
+       
+       a->raw_string = malloc(astr_len);
+       if (!a->raw_string)
+               return -1;
+
+       strcpy(a->raw_string, astr);
+
+       return 0;
+}
+
+static mnemonic_t *search_mnem(char *name)
+/* Binary search for a mnemonic.  (That's why the table is sorted.) */
+{
+       int low, mid, high;
+       int cmp;
+       mnemonic_t *m;
+       char name_buf[64];
+       int brk = 0;
+
+try_long:
+       low= 0;
+       high= arraysize(mnemtab)-1;
+       while (low <= high) {
+               mid= (low + high) / 2;
+               m= &mnemtab[mid];
+
+               if ((cmp= strcmp(name, m->name)) == 0) return m;
+
+               if (cmp < 0) high= mid-1; else low= mid+1;
+       }
+
+       /*
+        * in gnu the modifier 'l' is usually omitted, however we need the
+        * information about the arguments length. Therefore we try if we know
+        * such instruction. It covers most of the cases of unknown instructions
+        */
+       if (!brk) {
+               int len = strlen(name);
+               strcpy(name_buf, name);
+               name_buf[len] = 'l';
+               name_buf[len + 1] = '\0';
+               name = name_buf;
+               brk = 1;
+               goto try_long;
+       }
+
+       return nil;
+}
+
+static expression_t *gnu_get_C_expression(int *pn)
+/* Read a "C-like" expression.  Note that we don't worry about precedence,
+ * the expression is printed later like it is read.  If the target language
+ * does not have all the operators (like ~) then this has to be repaired by
+ * changing the source file.  (No problem, you still have one source file
+ * to maintain, not two.)
+ */
+{
+       expression_t *e, *a1, *a2;
+       token_t *t;
+
+       if ((t= get_token(*pn))->symbol == '(') {
+               /* ( expr ): grouping. */
+               (*pn)++;
+               if ((a1= gnu_get_C_expression(pn)) == nil) return nil;
+               if (get_token(*pn)->symbol != ')') {
+                       parse_err(1, t, "missing )\n");
+                       del_expr(a1);
+                       return nil;
+               }
+               (*pn)++;
+               e= new_expr();
+               e->operator= '[';
+               e->middle= a1;
+       } else
+       if (t->type == T_WORD || t->type == T_STRING) {
+               /* Label, number, or string. */
+               e= new_expr();
+               e->operator= t->type == T_WORD ? 'W' : 'S';
+               e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
+               memcpy(e->name, t->name , t->len+1);
+               e->len= t->len;
+               (*pn)++;
+       } else
+       if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
+               /* Unary operator. */
+               (*pn)++;
+               if ((a1= gnu_get_C_expression(pn)) == nil) return nil;
+               e= new_expr();
+               e->operator= t->symbol;
+               e->middle= a1;
+       } else {
+               parse_err(1, t, "expression syntax error\n");
+               return nil;
+       }
+
+       switch ((t= get_token(*pn))->symbol) {
+       case '%': 
+       case '+':
+       case '-':
+       case '*':
+       case '/':
+       case '&':
+       case '|':
+       case '^':
+       case S_LEFTSHIFT:
+       case S_RIGHTSHIFT:
+               (*pn)++;
+               a1= e;
+               if ((a2= gnu_get_C_expression(pn)) == nil) {
+                       del_expr(a1);
+                       return nil;
+               }
+               e= new_expr();
+               e->operator= t->symbol;
+               e->left= a1;
+               e->right= a2;
+       }
+       return e;
+}
+
+static expression_t *gnu_get_operand(asm86_t * a, int *pn, int deref)
+/* Get something like: $immed, memory, offset(%base,%index,scale), or simpler. */
+{
+       expression_t *e, *offset, *base, *index;
+       token_t *t;
+       int c;
+
+       if (get_token(*pn)->symbol == '$') {
+               /* An immediate value. */
+               (*pn)++;
+               return gnu_get_C_expression(pn);
+       }
+
+       if (get_token(*pn)->symbol == '*') {
+               (*pn)++;
+               deref = 1;
+#if 0
+               if ((offset= gnu_get_operand(a, pn, deref)) == nil) return nil;
+#if 0
+               e= new_expr();
+               e->operator= '(';
+               e->middle= offset;
+               return e;
+#endif
+               return offset;
+#endif
+       }
+
+       if ((get_token(*pn)->symbol == '%')
+               && (t= get_token(*pn + 1))->type == T_WORD
+               && isregister(t->name)
+       ) {
+               /* A register operand. */
+               (*pn)+= 2;
+               e= new_expr();
+               e->operator= 'W';
+               e->name= copystr(t->name);
+               return e;
+       }
+
+       /* Offset? */
+       if (get_token(*pn)->symbol != '('
+                               || get_token(*pn + 1)->symbol != '%') {
+               /* There is an offset. */
+               if ((offset= gnu_get_C_expression(pn)) == nil) return nil;
+       } else {
+               /* No offset. */
+               offset= nil;
+       }
+
+       /* (%base,%index,scale) ? */
+       base= index= nil;
+       if (get_token(*pn)->symbol == '(') {
+               (*pn)++;
+
+               /* %base ? */
+               if (get_token(*pn)->symbol == '%'
+                       && (t= get_token(*pn + 1))->type == T_WORD
+                       && isregister(t->name)
+               ) {
+                       /* A base register expression. */
+                       base= new_expr();
+                       base->operator= 'B';
+                       base->name= copystr(t->name);
+                       (*pn)+= 2;
+               }
+
+               if (get_token(*pn)->symbol == ',') (*pn)++;
+
+               /* %index ? */
+               if (get_token(*pn)->symbol == '%'
+                       && (t= get_token(*pn + 1))->type == T_WORD
+                       && isregister(t->name)
+               ) {
+                       /* A index register expression. */
+                       index= new_expr();
+                       index->operator= '1';           /* for now */
+                       index->name= copystr(t->name);
+                       (*pn)+= 2;
+               }
+
+               if (get_token(*pn)->symbol == ',') (*pn)++;
+
+               /* scale ? */
+               if ((base != nil || index != nil)
+                       && (t= get_token(*pn))->type == T_WORD
+                       && strchr("1248", t->name[0]) != nil
+                       && t->name[1] == 0
+               ) {             
+                       if (index == nil) {
+                               /* Base is really an index register. */
+                               index= base;
+                               base= nil;
+                       }
+                       index->operator= t->name[0];
+                       (*pn)++;
+               }
+
+               if (get_token(*pn)->symbol == ')') {
+                       /* Ending paren. */
+                       (*pn)++;
+               } else {
+                       /* Alas. */
+                       parse_err(1, t, "operand syntax error\n");
+                       del_expr(offset);
+                       del_expr(base);
+                       del_expr(index);
+                       return nil;
+               }
+       }
+
+       if (base == nil && index == nil) {
+               if (deref) {
+                       /* Return a lone offset as (offset). */
+                       e= new_expr();
+                       e->operator= '(';
+                       e->middle= offset;
+               } else {
+                       /* Return a lone offset as is. */
+                       e= offset;
+               }
+       } else {
+               e= new_expr();
+               e->operator= 'O';
+               e->left= offset;
+
+               e->middle= base;
+               e->right= index;
+       }
+       return e;
+}
+
+static expression_t *gnu_get_oplist(asm86_t * a, int *pn, int deref)
+/* Get a comma (or colon for jmpf and callf) separated list of instruction
+ * operands.
+ */
+{
+       expression_t *e, *o1, *o2;
+       token_t *t;
+       int sreg;
+
+       if ((e= gnu_get_operand(a, pn, deref)) == nil) return nil;
+
+       t = get_token(*pn);
+       
+       if (t->symbol == ':' && IS_REGSEG(sreg = isregister(e->name))) {
+               a->seg = segreg2seg(sreg);
+               del_expr(e);
+               (*pn)++;
+               e = gnu_get_oplist(a, pn, deref);
+       }
+       else if (t->symbol == ',' || t->symbol == ':') {
+               o1= e;
+               (*pn)++;
+               if ((o2= gnu_get_oplist(a, pn, deref)) == nil) {
+                       del_expr(o1);
+                       return nil;
+               }
+               e= new_expr();
+               e->operator= ',';
+               e->left= o2;
+               e->right= o1;
+       }
+       return e;
+}
+
+
+static asm86_t *gnu_get_statement(void)
+/* Get a pseudo op or machine instruction with arguments. */
+{
+       token_t *t= get_token(0);
+       token_t *tn;
+       asm86_t *a;
+       mnemonic_t *m;
+       int n;
+       int prefix_seen;
+       int deref;
+
+       assert(t->type == T_WORD);
+
+       a= new_asm86();
+
+       /* Process instruction prefixes. */
+       for (prefix_seen= 0;; prefix_seen= 1) {
+               if (strcmp(t->name, "rep") == 0
+                       || strcmp(t->name, "repe") == 0
+                       || strcmp(t->name, "repne") == 0
+                       || strcmp(t->name, "repz") == 0
+                       || strcmp(t->name, "repnz") == 0
+               ) {
+                       if (a->rep != ONCE) {
+                               parse_err(1, t,
+                                       "can't have more than one rep\n");
+                       }
+                       switch (t->name[3]) {
+                       case 0:         a->rep= REP;    break;
+                       case 'e':
+                       case 'z':       a->rep= REPE;   break;
+                       case 'n':       a->rep= REPNE;  break;
+                       }
+               } else
+               if (!prefix_seen) {
+                       /* No prefix here, get out! */
+                       break;
+               } else {
+                       /* No more prefixes, next must be an instruction. */
+                       if (t->type != T_WORD
+                               || (m= search_mnem(t->name)) == nil
+                               || m->optype == PSEUDO
+                       ) {
+                               parse_err(1, t,
+               "machine instruction expected after instruction prefix\n");
+                               del_asm86(a);
+                               return nil;
+                       }
+                       break;
+               }
+
+               /* Skip the prefix and extra newlines. */
+               do {
+                       skip_token(1);
+               } while ((t= get_token(0))->symbol == ';');
+       }
+
+       /* All the readahead being done upsets the line counter. */
+       a->line= t->line;
+
+       /* Read a machine instruction or pseudo op. */
+       if ((m= search_mnem(t->name)) == nil) {
+               /* we assume that unknown stuff is part of unresolved macro */
+               a->opcode = UNKNOWN;
+               if (zap_unknown(a)) {
+                       parse_err(1, t, "unknown instruction '%s'\n", t->name);
+                       del_asm86(a);
+                       return nil;
+               }
+               return a;
+       }
+       a->opcode= m->opcode;
+       a->optype= m->optype;
+       a->oaz= 0;
+       if (a->optype == OWORD) {
+               a->oaz|= OPZ;
+               a->optype= WORD;
+       }
+       else if (a->optype == JUMP16) {
+               a->oaz|= OPZ;
+               a->optype= JUMP;
+       }
+
+       switch (a->opcode) {
+       case IN:
+       case OUT:
+       case INT:
+               deref= 0;
+               break;
+       default:
+               deref= (a->optype >= BYTE);
+       }
+       n= 1;
+       if (get_token(1)->type != T_COMMENT && get_token(1)->symbol != ';'
+                       && (a->args= gnu_get_oplist(a, &n, deref)) == nil) {
+               del_asm86(a);
+               return nil;
+       }
+       tn = get_token(n);
+       if (tn->type == T_COMMENT) {
+               a->raw_string = malloc(tn->len + 1);
+               if (!a->raw_string)
+                       return NULL;
+
+               strcpy(a->raw_string, tn->name);
+       } else
+       if (get_token(n)->symbol != ';') {
+               parse_err(1, t, "garbage at end of instruction\n");
+               del_asm86(a);
+               return nil;
+       }
+       if (!is_pseudo(a->opcode)) {
+               /* GNU operand order is the other way around. */
+               expression_t *e, *t;
+
+               e= a->args;
+               while (e != nil && e->operator == ',') {
+                       t= e->right; e->right= e->left; e->left= t;
+                       e= e->left;
+               }
+       }
+       switch (a->opcode) {
+       case DOT_ALIGN:
+               /* Delete two argument .align, because ACK can't do it.
+                */
+               if (a->args == nil || a->args->operator != 'W') {       
+                       del_asm86(a);
+                       return nil;
+               }
+               if (a->args != nil && a->args->operator == 'W'
+                       && isanumber(a->args->name)
+               ) {     
+                       unsigned n;
+                       char num[sizeof(int) * CHAR_BIT / 3 + 1];
+
+                       n= strtoul(a->args->name, nil, 0);
+                       sprintf(num, "%u", n);
+                       deallocate(a->args->name);
+                       a->args->name= copystr(num);
+               }
+               break;
+       case DOT_DEFINE:
+       case DOT_EXTERN:
+               syms_add_global_csl(a->args);
+               break;
+       case DOT_COMM:
+               syms_add_global(a->args->left->name);
+               break;
+       case DOT_LCOMM:
+               syms_add(a->args->left->name);
+               break;
+       case JMPF:
+       case CALLF:
+               /*FALL THROUGH*/
+       case JMP:
+       case CALL:
+               break;
+       default:;
+       }
+       skip_token(n+1);
+       return a;
+}
+
+
+asm86_t *gnu_get_instruction(void)
+{
+       asm86_t *a= nil;
+       expression_t *e;
+       token_t *t;
+
+       while ((t= get_token(0))->symbol == ';' || t->symbol == '/') {
+               zap();          /* if a comment started by a '/' */
+               skip_token(1);
+       }
+
+       if (t->type == T_EOF) return nil;
+
+       if (t->type == T_COMMENT || t->type == T_C_PREPROCESSOR) {
+
+               a = new_asm86();
+               if (t->type == T_COMMENT)
+                       a->opcode = COMMENT;
+               else
+                       a->opcode = C_PREPROCESSOR;
+
+               a->raw_string = malloc(t->len + 1);
+               if (!a->raw_string)
+                       return NULL;
+
+               strcpy(a->raw_string, t->name);
+               skip_token(1);
+               return a;
+       }
+
+       if (t->symbol == '#') {
+               /* Preprocessor line and file change. */
+
+               if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
+                       || get_token(2)->type != T_STRING
+               ) {
+                       parse_err(1, t, "file not preprocessed?\n");
+                       zap();
+               } else {
+                       set_file(get_token(2)->name,
+                               strtol(get_token(1)->name, nil, 0) - 1);
+
+                       /* GNU CPP adds extra cruft, simply zap the line. */
+                       zap();
+               }
+               a= gnu_get_instruction();
+       } else
+       if (t->type == T_WORD && get_token(1)->symbol == ':') {
+               /* A label definition. */
+
+               a= new_asm86();
+               a->line= t->line;
+               a->opcode= DOT_LABEL;
+               a->optype= PSEUDO;
+               a->args= e= new_expr();
+               e->operator= ':';
+               e->name= copystr(t->name);
+               syms_add(t->name);
+               skip_token(2);
+       } else
+       if (t->type == T_WORD && get_token(1)->symbol == '=') {
+               int n= 2;
+
+               if ((e= gnu_get_C_expression(&n)) == nil) {
+                       zap();
+                       a= gnu_get_instruction();
+               } else
+               if (get_token(n)->type != T_COMMENT && get_token(n)->symbol != ';') {
+                       parse_err(1, t, "garbage after assignment\n");
+                       zap();
+                       a= gnu_get_instruction();
+               } else {
+                       a= new_asm86();
+                       if (get_token(n)->type == T_COMMENT) {
+                               token_t *c = get_token(n);
+
+                               a->raw_string = malloc(c->len + 1);
+                               if (!a->raw_string)
+                                       return NULL;
+
+                               strcpy(a->raw_string, c->name);
+                       }
+                       a->line= t->line;
+                       a->opcode= DOT_EQU;
+                       a->optype= PSEUDO;
+                       a->args= new_expr();
+                       a->args->operator= '=';
+                       a->args->name= copystr(t->name);
+                       syms_add(t->name);
+                       a->args->middle= e;
+                       skip_token(n+1);
+               }
+       } else
+       if (t->type == T_WORD) {
+               if ((a= gnu_get_statement()) == nil) {
+                       zap();
+                       a= gnu_get_instruction();
+               }
+       } else {
+               parse_err(1, t, "syntax error\n");
+               zap();
+               a= gnu_get_instruction();
+       }
+       return a;
+}
diff --git a/commands/i386/gas2ack/token.h b/commands/i386/gas2ack/token.h
new file mode 100644 (file)
index 0000000..fbf8ae6
--- /dev/null
@@ -0,0 +1,32 @@
+/*     token.h - token definition                      Author: Kees J. Bot
+ *                                                             13 Dec 1993
+ */
+
+typedef enum toktype {
+       T_EOF,
+       T_CHAR,
+       T_WORD,
+       T_STRING,
+       T_COMMENT,
+       T_C_PREPROCESSOR
+} toktype_t;
+
+typedef struct token {
+       struct token    *next;
+       long            line;
+       toktype_t       type;
+       int             symbol;         /* Single character symbol. */
+       char            *name;          /* Word, number, etc. */
+       size_t          len;            /* Length of string. */
+} token_t;
+
+#define S_LEFTSHIFT    0x100           /* << */
+#define S_RIGHTSHIFT   0x101           /* >> */
+
+void set_file(char *file, long line);
+void get_file(char **file, long *line);
+void parse_err(int err, token_t *where, const char *fmt, ...);
+void parse_warn(int err, token_t *t, const char *fmt, ...);
+void tok_init(char *file, int comment);
+token_t *get_token(int n);
+void skip_token(int n);
diff --git a/commands/i386/gas2ack/tokenize.c b/commands/i386/gas2ack/tokenize.c
new file mode 100644 (file)
index 0000000..51edbdf
--- /dev/null
@@ -0,0 +1,410 @@
+/*     tokenize.c - split input into tokens            Author: Kees J. Bot
+ *                                                             13 Dec 1993
+ */
+#define nil 0
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "asmconv.h"
+#include "token.h"
+
+static FILE *tf;
+static char *tfile;
+static char *orig_tfile;
+static int tcomment;
+static int tc;
+static long tline;
+static token_t *tq;
+static int nl, prevnl;
+
+static int old_n= 0;           /* To speed up n, n+1, n+2, ... accesses. */
+static token_t **old_ptq= &tq;
+
+static void tok_reset(void)
+{
+       nl = prevnl = 0;
+       tline = 0;
+       if (tf)
+               fclose(tf); /* ignore error */
+       tf = NULL;
+       old_n = 0;
+       old_ptq = &tq;
+       tq = NULL;
+}
+
+static void readtc(void)
+/* Read one character from the input file and put it in the global 'tc'. */
+{
+       if (nl) tline++;
+       if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile);
+       prevnl = nl;
+       nl= (tc == '\n');
+}
+
+static void unreadtc(int tc)
+{
+       if (tc == '\n')
+               tline--;
+       nl = prevnl;
+       ungetc(tc, tf);
+}
+
+void set_file(char *file, long line)
+/* Set file name and line number, changed by a preprocessor trick. */
+{
+       deallocate(tfile);
+       tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0]));
+       strcpy(tfile, file);
+       tline= line;
+}
+
+void get_file(char **file, long *line)
+/* Get file name and line number. */
+{
+       *file= tfile;
+       *line= tline;
+}
+
+void parse_err(int err, token_t *t, const char *fmt, ...)
+/* Report a parsing error. */
+{
+       va_list ap;
+
+       fprintf(stderr, "* error : \"%s\", line %ld: ", tfile,
+                                               t == nil ? tline : t->line);
+       va_start(ap, fmt);
+       vfprintf(stderr, fmt, ap);
+       va_end(ap);
+       if (err) set_error();
+}
+
+void parse_warn(int err, token_t *t, const char *fmt, ...)
+/* Report a parsing error. */
+{
+       va_list ap;
+
+       fprintf(stderr, "warning : \"%s\", line %ld: ", tfile,
+                                               t == nil ? tline : t->line);
+       va_start(ap, fmt);
+       vfprintf(stderr, fmt, ap);
+       va_end(ap);
+}
+
+void tok_init(char *file, int comment)
+/* Open the file to tokenize and initialize the tokenizer. */
+{
+       tok_reset();
+
+       if (file == nil) {
+               file= "stdin";
+               tf= stdin;
+       } else {
+               if ((tf= fopen(file, "r")) == nil) fatal(file);
+       }
+       orig_tfile= file;
+       set_file(file, 1);
+       readtc();
+       tcomment= comment;
+}
+
+static int isspace(int c)
+{
+       return between('\0', c, ' ') && c != '\n';
+}
+
+#define iscomment(c)   ((c) == tcomment)
+
+static int isidentchar(int c)
+{
+       return between('a', c, 'z')
+               || between('A', c, 'Z')
+               || between('0', c, '9')
+               || c == '.'
+               || c == '_'
+               ;
+}
+
+static token_t *new_token(void)
+{
+       token_t *new;
+
+       new= allocate(nil, sizeof(*new));
+       new->next= nil;
+       new->line= tline;
+       new->name= nil;
+       new->symbol= -1;
+       return new;
+}
+
+static token_t *get_word(void)
+/* Read one word, an identifier, a number, a label, or a mnemonic. */
+{
+       token_t *w;
+       char *name;
+       size_t i, len;
+
+       i= 0;
+       len= 16;
+       name= allocate(nil, len * sizeof(name[0]));
+
+       while (isidentchar(tc)) {
+               name[i++]= tc;
+               readtc();
+               if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0]));
+       }
+       name[i]= 0;
+       name= allocate(name, (i+1) * sizeof(name[0]));
+       w= new_token();
+       w->type= T_WORD;
+       w->name= name;
+       w->len= i;
+       return w;
+}
+
+static token_t *get_string(void)
+/* Read a single or double quotes delimited string. */
+{
+       token_t *s;
+       int quote;
+       char *str;
+       size_t i, len;
+       int n, j;
+       int seen;
+
+       quote= tc;
+       readtc();
+
+       i= 0;
+       len= 16;
+       str= allocate(nil, len * sizeof(str[0]));
+
+       while (tc != quote && tc != '\n' && tc != EOF) {
+               seen= -1;
+               if (tc == '\\') {
+                       readtc();
+                       if (tc == '\n' || tc == EOF) break;
+
+                       switch (tc) {
+                       case 'a':       tc= '\a'; break;
+                       case 'b':       tc= '\b'; break;
+                       case 'f':       tc= '\f'; break;
+                       case 'n':       tc= '\n'; break;
+                       case 'r':       tc= '\r'; break;
+                       case 't':       tc= '\t'; break;
+                       case 'v':       tc= '\v'; break;
+                       case 'x':
+                               n= 0;
+                               for (j= 0; j < 3; j++) {
+                                       readtc();
+                                       if (between('0', tc, '9'))
+                                               tc-= '0' + 0x0;
+                                       else
+                                       if (between('A', tc, 'A'))
+                                               tc-= 'A' + 0xA;
+                                       else
+                                       if (between('a', tc, 'a'))
+                                               tc-= 'a' + 0xa;
+                                       else {
+                                               seen= tc;
+                                               break;
+                                       }
+                                       n= n*0x10 + tc;
+                               }
+                               tc= n;
+                               break;
+                       default:
+                               if (!between('0', tc, '9')) break;
+                               n= 0;
+                               for (j= 0; j < 3; j++) {
+                                       if (between('0', tc, '9'))
+                                               tc-= '0';
+                                       else {
+                                               seen= tc;
+                                               break;
+                                       }
+                                       n= n*010 + tc;
+                                       readtc();
+                               }
+                               tc= n;
+                       }
+               }
+               str[i++]= tc;
+               if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0]));
+
+               if (seen < 0) readtc(); else tc= seen;
+       }
+
+       if (tc == quote) {
+               readtc();
+       } else {
+               parse_err(1, nil, "string contains newline\n");
+       }
+       str[i]= 0;
+       str= allocate(str, (i+1) * sizeof(str[0]));
+       s= new_token();
+       s->type= T_STRING;
+       s->name= str;
+       s->len= i;
+       return s;
+}
+
+#define MAX_TOKEN_STR_SIZE     4096
+static char token_str[MAX_TOKEN_STR_SIZE];
+static unsigned token_sz;
+
+token_t *get_token(int n)
+/* Return the n-th token on the input queue. */
+{
+       token_t *t, **ptq;
+
+       assert(n >= 0);
+
+       if (0 && n >= old_n) {
+               /* Go forward from the previous point. */
+               n-= old_n;
+               old_n+= n;
+               ptq= old_ptq;
+       } else {
+               /* Restart from the head of the queue. */
+               old_n= n;
+               ptq= &tq;
+       }
+
+       for (;;) {
+               if ((t= *ptq) == nil) {
+                       /* consume white spaces */
+                       while (isspace(tc))
+                               readtc();
+                       /* read long C comments */
+                       if (tc == '/') {
+                               readtc();
+                               if (tc != '*') {
+                                       unreadtc(tc);
+                                       tc = '/';
+                               }
+                               else {
+                                       token_sz = 2;
+                                       t = new_token();
+                                       t->type = T_COMMENT;
+                                       token_str[0] = '/';
+                                       token_str[1] = '*';
+                                       readtc();
+                                       for(;;) {
+                                               while (tc != EOF && tc != '/') {
+                                                       token_str[token_sz++] = (char)tc;
+                                                       readtc();
+                                               }
+                                               if (tc == '/') {
+                                                       if (token_str[token_sz - 1] == '*') {
+                                                               token_str[token_sz++] = (char)tc;
+                                                               readtc();
+                                                               break;
+                                                       }
+                                                       token_str[token_sz++] = (char)tc;
+                                                       readtc();
+                                               }
+                                               else if (tc == EOF)
+                                                       break;
+                                       }
+                                       token_str[token_sz] = 0;
+                                       t->name= malloc(token_sz + 1);
+                                       if (t->name == NULL) {
+                                               fprintf(stderr, "malloc() failed\n");
+                                               exit(-1);
+                                       }
+                                       strcpy(t->name, token_str);
+                                       t->len = token_sz;
+                                       goto token_found;
+                               }
+                       }
+                       if (iscomment(tc) || tc == '#') {
+                               t = new_token();
+                               if (iscomment(tc))
+                                       t->type = T_COMMENT;
+                               else
+                                       t->type = T_C_PREPROCESSOR;
+                               token_sz = 0;
+                               for(;;) {
+                                       while (tc != '\n' && tc != EOF) {
+                                               token_str[token_sz++] = (char)tc;
+                                               readtc();
+                                       }
+                                       if (t->type == T_C_PREPROCESSOR &&
+                                                       token_str[token_sz - 1] == '\\') {
+                                               token_str[token_sz++] = '\n';
+                                               tline++;
+                                               readtc();
+                                       }
+                                       else
+                                               break;
+                               }
+                               token_str[token_sz] = 0;
+                               t->name= malloc(token_sz + 1);
+                               if (t->name == NULL) {
+                                       fprintf(stderr, "malloc() failed\n");
+                                       exit(-1);
+                               }
+                               strcpy(t->name, token_str);
+                               t->len = token_sz;
+                               goto token_found;
+                       }
+
+                       if (tc == EOF) {
+                               t= new_token();
+                               t->type= T_EOF;
+                       } else
+                       if (isidentchar(tc)) {
+                               t= get_word();
+                       } else
+                       if (tc == '\'' || tc == '"') {
+                               t= get_string();
+                       } else {
+                               if (tc == '\n') tc= ';';
+                               t= new_token();
+                               t->type= T_CHAR;
+                               t->symbol= tc;
+                               readtc();
+                               if (t->symbol == '<' && tc == '<') {
+                                       t->symbol= S_LEFTSHIFT;
+                                       readtc();
+                               } else
+                               if (t->symbol == '>' && tc == '>') {
+                                       t->symbol= S_RIGHTSHIFT;
+                                       readtc();
+                               }
+                       }
+token_found:
+                       *ptq= t;
+               }
+               if (n == 0) break;
+               n--;
+               ptq= &t->next;
+       }
+       old_ptq= ptq;
+       return t;
+}
+
+void skip_token(int n)
+/* Remove n tokens from the input queue.  One is not allowed to skip unread
+ * tokens.
+ */
+{
+       token_t *junk;
+
+       assert(n >= 0);
+
+       while (n > 0) {
+               assert(tq != nil);
+
+               junk= tq;
+               tq= tq->next;
+               deallocate(junk->name);
+               deallocate(junk);
+               n--;
+       }
+       /* Reset the old reference. */
+       old_n= 0;
+       old_ptq= &tq;
+}