From 41d481b065387cb3cc12170ce29a024d3d63ca3f Mon Sep 17 00:00:00 2001 From: Tomas Hruby Date: Fri, 30 Oct 2009 15:57:35 +0000 Subject: [PATCH] gas2ack - an asmconv based tool for conversion from GNU ia32 assembly to ACK assembly - in contrast to asmconv it is a one way tool only - as the GNU assembly in Minix does not prefix global C symbols with _ gas2ack detects such symbols and prefixes them to be compliant with the ACK convention - gas2ack preserves comments and unexpanded macros - bunch of fixes to the asmconv GNU->ACK direction - support of more instructions that ACK does not know but are in use in Minix - it is meant as a temporary solution as long as ACK will be a supported compiler for the core system --- commands/i386/Makefile | 1 + commands/i386/gas2ack/Makefile | 29 + commands/i386/gas2ack/asm86.c | 87 +++ commands/i386/gas2ack/asm86.h | 272 ++++++++ commands/i386/gas2ack/asmconv.h | 24 + commands/i386/gas2ack/emit_ack.c | 696 ++++++++++++++++++++ commands/i386/gas2ack/gas2ack.c | 116 ++++ commands/i386/gas2ack/globals.c | 134 ++++ commands/i386/gas2ack/globals.h | 9 + commands/i386/gas2ack/languages.h | 25 + commands/i386/gas2ack/parse_gnu.c | 1017 +++++++++++++++++++++++++++++ commands/i386/gas2ack/token.h | 32 + commands/i386/gas2ack/tokenize.c | 410 ++++++++++++ 13 files changed, 2852 insertions(+) create mode 100644 commands/i386/gas2ack/Makefile create mode 100644 commands/i386/gas2ack/asm86.c create mode 100644 commands/i386/gas2ack/asm86.h create mode 100644 commands/i386/gas2ack/asmconv.h create mode 100644 commands/i386/gas2ack/emit_ack.c create mode 100644 commands/i386/gas2ack/gas2ack.c create mode 100644 commands/i386/gas2ack/globals.c create mode 100644 commands/i386/gas2ack/globals.h create mode 100644 commands/i386/gas2ack/languages.h create mode 100644 commands/i386/gas2ack/parse_gnu.c create mode 100644 commands/i386/gas2ack/token.h create mode 100644 commands/i386/gas2ack/tokenize.c diff --git a/commands/i386/Makefile b/commands/i386/Makefile index b052d08d5..a391d8869 100755 --- a/commands/i386/Makefile +++ b/commands/i386/Makefile @@ -28,3 +28,4 @@ clean:: all install clean:: cd asmconv && $(MAKE) $@ + cd gas2ack && $(MAKE) $@ diff --git a/commands/i386/gas2ack/Makefile b/commands/i386/gas2ack/Makefile new file mode 100644 index 000000000..3c25c6a9d --- /dev/null +++ b/commands/i386/gas2ack/Makefile @@ -0,0 +1,29 @@ +# Makefile for gas2ack. + +CFLAGS= $(OPT) +LDFLAGS= -i +CC = exec cc + +all: gas2ack + +OBJ= asm86.o gas2ack.o globals.o parse_gnu.o \ + tokenize.o emit_ack.o + +gas2ack: $(OBJ) + $(CC) $(LDFLAGS) -o $@ $(OBJ) + +install: /usr/bin/gas2ack + +/usr/bin/gas2ack: gas2ack + install -cs -o bin gas2ack $@ + +clean: + rm -f $(OBJ) gas2ack core + +# Dependencies. +asm86.o: asm86.h asmconv.h token.h +gas2ack.o: asmconv.h languages.h asm86.h +globals.o: asm86.h +parse_gnu.o: asmconv.h languages.h token.h asm86.h +tokenize.o: asmconv.h token.h +emit_ack.o: asmconv.h languages.h token.h asm86.h diff --git a/commands/i386/gas2ack/asm86.c b/commands/i386/gas2ack/asm86.c new file mode 100644 index 000000000..29c5b7ba9 --- /dev/null +++ b/commands/i386/gas2ack/asm86.c @@ -0,0 +1,87 @@ +/* asm86.c - 80X86 assembly intermediate Author: Kees J. Bot + * 24 Dec 1993 + */ +#define nil 0 +#include +#include +#include +#include "asm86.h" +#include "asmconv.h" +#include "token.h" + +expression_t *new_expr(void) +/* Make a new cell to build an expression. */ +{ + expression_t *e; + + e= allocate(nil, sizeof(*e)); + e->operator= -1; + e->left= e->middle= e->right= nil; + e->name= nil; + e->magic= 31624; + return e; +} + +void del_expr(expression_t *e) +/* Delete an expression tree. */ +{ + if (e != nil) { + assert(e->magic == 31624); + e->magic= 0; + deallocate(e->name); + del_expr(e->left); + del_expr(e->middle); + del_expr(e->right); + deallocate(e); + } +} + +asm86_t *new_asm86(void) +/* Make a new cell to hold an 80X86 instruction. */ +{ + asm86_t *a; + + a= allocate(nil, sizeof(*a)); + a->opcode= -1; + get_file(&a->file, &a->line); + a->optype= NONE; + a->oaz= 0; + a->rep= ONCE; + a->seg= DEFSEG; + a->args= nil; + a->magic= 37937; + a->raw_string = NULL; + return a; +} + +void del_asm86(asm86_t *a) +/* Delete an 80X86 instruction. */ +{ + assert(a != nil); + assert(a->magic == 37937); + a->magic= 0; + del_expr(a->args); + deallocate(a->raw_string); + a->raw_string = NULL; + deallocate(a); +} + +int isregister(const char *name) +/* True if the string is a register name. Return its size. */ +{ + static char *regs[] = { + "al", "bl", "cl", "dl", "ah", "bh", "ch", "dh", + "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", + "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", + "cs", "ds", "es", "fs", "gs", "ss", + "cr0", "cr1", "cr2", "cr3", "cr4", + "st", + }; + int reg; + + for (reg= 0; reg < arraysize(regs); reg++) { + if (strcmp(name, regs[reg]) == 0) + return reg+1; + } + return 0; +} diff --git a/commands/i386/gas2ack/asm86.h b/commands/i386/gas2ack/asm86.h new file mode 100644 index 000000000..aae77bdbb --- /dev/null +++ b/commands/i386/gas2ack/asm86.h @@ -0,0 +1,272 @@ +/* asm86.h - 80X86 assembly intermediate Author: Kees J. Bot + * 27 Jun 1993 + */ + +typedef enum opcode { /* 80486 opcodes, from the i486 reference manual. + * Synonyms left out, some new words invented. + */ + DOT_ALIGN, + DOT_ASCII, DOT_ASCIZ, + DOT_ASSERT, /* Pseudo's invented */ + DOT_BASE, + DOT_COMM, DOT_LCOMM, + DOT_DATA1, + DOT_DATA2, + DOT_DATA4, + DOT_DEFINE, DOT_EXTERN, + DOT_EQU, + DOT_FILE, DOT_LINE, + DOT_LABEL, + DOT_LIST, DOT_NOLIST, + DOT_SPACE, + DOT_SYMB, + DOT_TEXT, DOT_ROM, DOT_DATA, DOT_BSS, DOT_END, + DOT_USE16, DOT_USE32, + AAA, + AAD, + AAM, + AAS, + ADC, + ADD, + AND, + ARPL, + BOUND, + BSF, + BSR, + BSWAP, + BT, + BTC, + BTR, + BTS, + CALL, CALLF, /* CALLF added */ + CBW, + CLC, + CLD, + CLI, + CLTS, + CMC, + CMP, + CMPS, + CMPXCHG, + CPUID, + CWD, + DAA, + DAS, + DEC, + DIV, + ENTER, + F2XM1, + FABS, + FADD, FADDD, FADDS, FADDP, FIADDL, FIADDS, + FBLD, + FBSTP, + FCHS, + FCLEX, + FCOMD, FCOMS, FCOMPD, FCOMPS, FCOMPP, + FCOS, + FDECSTP, + FDIVD, FDIVS, FDIVP, FIDIVL, FIDIVS, + FDIVRD, FDIVRS, FDIVRP, FIDIVRL, FIDIVRS, + FFREE, + FICOM, FICOMP, + FILDQ, FILDL, FILDS, + FINCSTP, + FINIT, + FISTL, FISTS, FISTP, + FLDX, FLDD, FLDS, + FLD1, FLDL2T, FLDL2E, FLDPI, FLDLG2, FLDLN2, FLDZ, + FLDCW, + FLDENV, + FMULD, FMULS, FMULP, FIMULL, FIMULS, + FNOP, + FPATAN, + FPREM, + FPREM1, + FPTAN, + FRNDINT, + FRSTOR, + FSAVE, + FSCALE, + FSIN, + FSINCOS, + FSQRT, + FSTD, FSTS, FSTPX, FSTPD, FSTPS, + FSTCW, + FSTENV, + FSTSW, + FSUBD, FSUBS, FSUBP, FISUBL, FISUBS, + FSUBRD, FSUBRS, FSUBPR, FISUBRL, FISUBRS, + FTST, + FUCOM, FUCOMP, FUCOMPP, + FXAM, + FXCH, + FXTRACT, + FYL2X, + FYL2XP1, + HLT, + IDIV, + IMUL, + IN, + INC, + INS, + INT, INTO, + INVD, + INVLPG, + IRET, IRETD, + JA, JAE, JB, JBE, JCXZ, JE, JG, JGE, JL, + JLE, JNE, JNO, JNP, JNS, JO, JP, JS, + JMP, JMPF, /* JMPF added */ + LAHF, + LAR, + LEA, + LEAVE, + LGDT, LIDT, + LGS, LSS, LDS, LES, LFS, + LLDT, + LMSW, + LOCK, + LODS, + LOOP, LOOPE, LOOPNE, + LSL, + LTR, + MOV, + MOVS, + MOVSX, + MOVSXB, + MOVZX, + MOVZXB, + MUL, + NEG, + NOP, + NOT, + OR, + OUT, + OUTS, + POP, + POPA, + POPAD, + POPF, + PUSH, + PUSHA, + PUSHAD, + PUSHF, + RCL, RCR, ROL, ROR, + RET, RETF, /* RETF added */ + SAHF, + SAL, SAR, SHL, SHR, + SBB, + SCAS, + SETA, SETAE, SETB, SETBE, SETE, SETG, SETGE, SETL, + SETLE, SETNE, SETNO, SETNP, SETNS, SETO, SETP, SETS, + SGDT, SIDT, + SHLD, + SHRD, + SLDT, + SMSW, + STC, + STD, + STI, + STOS, + STR, + SUB, + TEST, + VERR, VERW, + WAIT, + WBINVD, + XADD, + XCHG, + XLAT, + XOR, + COMMENT, + C_PREPROCESSOR, + UNKNOWN +} opcode_t; + +#define is_pseudo(o) ((o) <= DOT_USE32) +#define N_OPCODES ((int) XOR + 1) + +#define OPZ 0x01 /* Operand size prefix. */ +#define ADZ 0x02 /* Address size prefix. */ + +typedef enum optype { + NONE, PSEUDO, JUMP, JUMP16, BYTE, WORD, OWORD /* Ordered list! */ +} optype_t; + +typedef enum repeat { + ONCE, REP, REPE, REPNE +} repeat_t; + +typedef enum segment { + DEFSEG, CSEG, DSEG, ESEG, FSEG, GSEG, SSEG +} segment_t; + +typedef struct expression { + int operator; + struct expression *left, *middle, *right; + char *name; + size_t len; + unsigned magic; +} expression_t; + +typedef struct asm86 { + opcode_t opcode; /* DOT_TEXT, MOV, ... */ + char *file; /* Name of the file it is found in. */ + long line; /* Line number. */ + optype_t optype; /* Type of operands: byte, word... */ + int oaz; /* Operand/address size prefix? */ + repeat_t rep; /* Repeat prefix used on this instr. */ + segment_t seg; /* Segment override. */ + expression_t *args; /* Arguments in ACK order. */ + unsigned magic; + char * raw_string; /* each instruction can have a comment. + Instruction can be empty if the + comment is the only thing on the + line. Or the instruction can be a + preprocessor macro. It may span + multiple lines and does not contain + any instruction + */ +} asm86_t; + +expression_t *new_expr(void); +void del_expr(expression_t *a); +asm86_t *new_asm86(void); +void del_asm86(asm86_t *a); + +int isregister(const char *name); + +#define IS_REG8(n) ((n) >= 1 && (n) <=8) +#define IS_REG16(n) ((n) >= 9 && (n) <=16) +#define IS_REG32(n) ((n) >= 17 && (n) <=24) +#define IS_REGSEG(n) ((n) >= 25 && (n) <=30) +#define IS_REGCR(n) ((n) >= 31 && (n) <=35) +#define segreg2seg(reg) ((segment_t)(reg - 25 + 1)) + +/* + * Format of the arguments of the asm86_t structure: + * + * + * ACK assembly operands expression_t cell: + * or part of operand: {operator, left, middle, right, name, len} + * + * [expr] {'[', nil, expr, nil} + * word {'W', nil, nil, nil, word} + * "string" {'S', nil, nil, nil, "string", strlen("string")} + * label = expr {'=', nil, expr, nil, label} + * expr * expr {'*', expr, nil, expr} + * - expr {'-', nil, expr, nil} + * (memory) {'(', nil, memory, nil} + * offset(base)(index*n) {'O', offset, base, index*n} + * base {'B', nil, nil, nil, base} + * index*4 {'4', nil, nil, nil, index} + * operand, oplist {',', operand, nil, oplist} + * label : {':', nil, nil, nil, label} + * + * The precedence of operators is ignored. The expression is simply copied + * as is, including parentheses. Problems like missing operators in the + * target language will have to be handled by rewriting the source language. + * 16-bit or 32-bit registers must be used where they are required by the + * target assembler even though ACK makes no difference between 'ax' and + * 'eax'. Asmconv is smart enough to transform compiler output. Human made + * assembly can be fixed up to be transformable. + */ diff --git a/commands/i386/gas2ack/asmconv.h b/commands/i386/gas2ack/asmconv.h new file mode 100644 index 000000000..4ef2eaae2 --- /dev/null +++ b/commands/i386/gas2ack/asmconv.h @@ -0,0 +1,24 @@ +/* asmconv.h - shared functions Author: Kees J. Bot + * 19 Dec 1993 + */ + +#define arraysize(a) (sizeof(a)/sizeof((a)[0])) +#define arraylimit(a) ((a) + arraysize(a)) +#define between(a, c, z) \ + ((unsigned)((c) - (a)) <= (unsigned)((z) - (a))) + +void *allocate(void *mem, size_t size); +void deallocate(void *mem); +void fatal(char *label); +char *copystr(const char *s); +int isanumber(const char *s); + +extern int asm_mode32; /* In 32 bit mode if true. */ + +#define use16() (!asm_mode32) +#define use32() ((int) asm_mode32) +#define set_use16() ((void) (asm_mode32= 0)) +#define set_use32() ((void) (asm_mode32= 1)) + +extern int err_code; /* Exit code. */ +#define set_error() ((void) (err_code= EXIT_FAILURE)) diff --git a/commands/i386/gas2ack/emit_ack.c b/commands/i386/gas2ack/emit_ack.c new file mode 100644 index 000000000..5a956e996 --- /dev/null +++ b/commands/i386/gas2ack/emit_ack.c @@ -0,0 +1,696 @@ +/* emit_ack.c - emit ACK assembly Author: Kees J. Bot + * emit NCC assembly 27 Dec 1993 + */ +#define nil 0 +#include +#include +#include +#include +#include +#include "asmconv.h" +#include "token.h" +#include "asm86.h" +#include "languages.h" +#include "globals.h" + +typedef struct mnemonic { /* ACK as86 mnemonics translation table. */ + opcode_t opcode; + char *name; +} mnemonic_t; + +static mnemonic_t mnemtab[] = { + { AAA, "aaa" }, + { AAD, "aad" }, + { AAM, "aam" }, + { AAS, "aas" }, + { ADC, "adc%" }, + { ADD, "add%" }, + { AND, "and%" }, + { ARPL, "arpl" }, + { BOUND, "bound" }, + { BSF, "bsf" }, + { BSR, "bsr" }, + { BSWAP, "bswap" }, + { BT, "bt" }, + { BTC, "btc" }, + { BTR, "btr" }, + { BTS, "bts" }, + { CALL, "call" }, + { CALLF, "callf" }, + { CBW, "cbw" }, + { CLC, "clc" }, + { CLD, "cld" }, + { CLI, "cli" }, + { CLTS, "clts" }, + { CMC, "cmc" }, + { CMP, "cmp%" }, + { CMPS, "cmps%" }, + { CMPXCHG, "cmpxchg" }, + { CWD, "cwd" }, + { DAA, "daa" }, + { DAS, "das" }, + { DEC, "dec%" }, + { DIV, "div%" }, + { DOT_ALIGN, ".align" }, + { DOT_ASCII, ".ascii" }, + { DOT_ASCIZ, ".asciz" }, + { DOT_ASSERT, ".assert" }, + { DOT_BASE, ".base" }, + { DOT_BSS, ".sect .bss" }, + { DOT_COMM, ".comm" }, + { DOT_DATA, ".sect .data" }, + { DOT_DATA1, ".data1" }, + { DOT_DATA2, ".data2" }, + { DOT_DATA4, ".data4" }, + { DOT_DEFINE, ".define" }, + { DOT_END, ".sect .end" }, + { DOT_EXTERN, ".extern" }, + { DOT_FILE, ".file" }, + { DOT_LCOMM, ".comm" }, + { DOT_LINE, ".line" }, + { DOT_LIST, ".list" }, + { DOT_NOLIST, ".nolist" }, + { DOT_ROM, ".sect .rom" }, + { DOT_SPACE, ".space" }, + { DOT_SYMB, ".symb" }, + { DOT_TEXT, ".sect .text" }, + { DOT_USE16, ".use16" }, + { DOT_USE32, ".use32" }, + { ENTER, "enter" }, + { F2XM1, "f2xm1" }, + { FABS, "fabs" }, + { FADD, "fadd" }, + { FADDD, "faddd" }, + { FADDP, "faddp" }, + { FADDS, "fadds" }, + { FBLD, "fbld" }, + { FBSTP, "fbstp" }, + { FCHS, "fchs" }, + { FCLEX, "fclex" }, + { FCOMD, "fcomd" }, + { FCOMPD, "fcompd" }, + { FCOMPP, "fcompp" }, + { FCOMPS, "fcomps" }, + { FCOMS, "fcoms" }, + { FCOS, "fcos" }, + { FDECSTP, "fdecstp" }, + { FDIVD, "fdivd" }, + { FDIVP, "fdivp" }, + { FDIVRD, "fdivrd" }, + { FDIVRP, "fdivrp" }, + { FDIVRS, "fdivrs" }, + { FDIVS, "fdivs" }, + { FFREE, "ffree" }, + { FIADDL, "fiaddl" }, + { FIADDS, "fiadds" }, + { FICOM, "ficom" }, + { FICOMP, "ficomp" }, + { FIDIVL, "fidivl" }, + { FIDIVRL, "fidivrl" }, + { FIDIVRS, "fidivrs" }, + { FIDIVS, "fidivs" }, + { FILDL, "fildl" }, + { FILDQ, "fildq" }, + { FILDS, "filds" }, + { FIMULL, "fimull" }, + { FIMULS, "fimuls" }, + { FINCSTP, "fincstp" }, + { FINIT, "finit" }, + { FISTL, "fistl" }, + { FISTP, "fistp" }, + { FISTS, "fists" }, + { FISUBL, "fisubl" }, + { FISUBRL, "fisubrl" }, + { FISUBRS, "fisubrs" }, + { FISUBS, "fisubs" }, + { FLD1, "fld1" }, + { FLDCW, "fldcw" }, + { FLDD, "fldd" }, + { FLDENV, "fldenv" }, + { FLDL2E, "fldl2e" }, + { FLDL2T, "fldl2t" }, + { FLDLG2, "fldlg2" }, + { FLDLN2, "fldln2" }, + { FLDPI, "fldpi" }, + { FLDS, "flds" }, + { FLDX, "fldx" }, + { FLDZ, "fldz" }, + { FMULD, "fmuld" }, + { FMULP, "fmulp" }, + { FMULS, "fmuls" }, + { FNOP, "fnop" }, + { FPATAN, "fpatan" }, + { FPREM, "fprem" }, + { FPREM1, "fprem1" }, + { FPTAN, "fptan" }, + { FRNDINT, "frndint" }, + { FRSTOR, "frstor" }, + { FSAVE, "fsave" }, + { FSCALE, "fscale" }, + { FSIN, "fsin" }, + { FSINCOS, "fsincos" }, + { FSQRT, "fsqrt" }, + { FSTCW, "fstcw" }, + { FSTD, "fstd" }, + { FSTENV, "fstenv" }, + { FSTPD, "fstpd" }, + { FSTPS, "fstps" }, + { FSTPX, "fstpx" }, + { FSTS, "fsts" }, + { FSTSW, "fstsw" }, + { FSUBD, "fsubd" }, + { FSUBP, "fsubp" }, + { FSUBPR, "fsubpr" }, + { FSUBRD, "fsubrd" }, + { FSUBRS, "fsubrs" }, + { FSUBS, "fsubs" }, + { FTST, "ftst" }, + { FUCOM, "fucom" }, + { FUCOMP, "fucomp" }, + { FUCOMPP, "fucompp" }, + { FXAM, "fxam" }, + { FXCH, "fxch" }, + { FXTRACT, "fxtract" }, + { FYL2X, "fyl2x" }, + { FYL2XP1, "fyl2xp1" }, + { HLT, "hlt" }, + { IDIV, "idiv%" }, + { IMUL, "imul%" }, + { IN, "in%" }, + { INC, "inc%" }, + { INS, "ins%" }, + { INT, "int" }, + { INTO, "into" }, + { INVD, "invd" }, + { INVLPG, "invlpg" }, + { IRET, "iret" }, + { IRETD, "iretd" }, + { JA, "ja" }, + { JAE, "jae" }, + { JB, "jb" }, + { JBE, "jbe" }, + { JCXZ, "jcxz" }, + { JE, "je" }, + { JG, "jg" }, + { JGE, "jge" }, + { JL, "jl" }, + { JLE, "jle" }, + { JMP, "jmp" }, + { JMPF, "jmpf" }, + { JNE, "jne" }, + { JNO, "jno" }, + { JNP, "jnp" }, + { JNS, "jns" }, + { JO, "jo" }, + { JP, "jp" }, + { JS, "js" }, + { LAHF, "lahf" }, + { LAR, "lar" }, + { LDS, "lds" }, + { LEA, "lea" }, + { LEAVE, "leave" }, + { LES, "les" }, + { LFS, "lfs" }, + { LGDT, "lgdt" }, + { LGS, "lgs" }, + { LIDT, "lidt" }, + { LLDT, "lldt" }, + { LMSW, "lmsw" }, + { LOCK, "lock" }, + { LODS, "lods%" }, + { LOOP, "loop" }, + { LOOPE, "loope" }, + { LOOPNE, "loopne" }, + { LSL, "lsl" }, + { LSS, "lss" }, + { LTR, "ltr" }, + { MOV, "mov%" }, + { MOVS, "movs%" }, + { MOVSX, "movsx" }, + { MOVSXB, "movsxb" }, + { MOVZX, "movzx" }, + { MOVZXB, "movzxb" }, + { MUL, "mul%" }, + { NEG, "neg%" }, + { NOP, "nop" }, + { NOT, "not%" }, + { OR, "or%" }, + { OUT, "out%" }, + { OUTS, "outs%" }, + { POP, "pop" }, + { POPA, "popa" }, + { POPAD, "popad" }, + { POPF, "popf" }, + { PUSH, "push" }, + { PUSHA, "pusha" }, + { PUSHAD, "pushad" }, + { PUSHF, "pushf" }, + { RCL, "rcl%" }, + { RCR, "rcr%" }, + { RET, "ret" }, + { RETF, "retf" }, + { ROL, "rol%" }, + { ROR, "ror%" }, + { SAHF, "sahf" }, + { SAL, "sal%" }, + { SAR, "sar%" }, + { SBB, "sbb%" }, + { SCAS, "scas%" }, + { SETA, "seta" }, + { SETAE, "setae" }, + { SETB, "setb" }, + { SETBE, "setbe" }, + { SETE, "sete" }, + { SETG, "setg" }, + { SETGE, "setge" }, + { SETL, "setl" }, + { SETLE, "setle" }, + { SETNE, "setne" }, + { SETNO, "setno" }, + { SETNP, "setnp" }, + { SETNS, "setns" }, + { SETO, "seto" }, + { SETP, "setp" }, + { SETS, "sets" }, + { SGDT, "sgdt" }, + { SHL, "shl%" }, + { SHLD, "shld" }, + { SHR, "shr%" }, + { SHRD, "shrd" }, + { SIDT, "sidt" }, + { SLDT, "sldt" }, + { SMSW, "smsw" }, + { STC, "stc" }, + { STD, "std" }, + { STI, "sti" }, + { STOS, "stos%" }, + { STR, "str" }, + { SUB, "sub%" }, + { TEST, "test%" }, + { VERR, "verr" }, + { VERW, "verw" }, + { WAIT, "wait" }, + { WBINVD, "wbinvd" }, + { XADD, "xadd" }, + { XCHG, "xchg%" }, + { XLAT, "xlat" }, + { XOR, "xor%" }, +}; + +#define farjmp(o) ((o) == JMPF || (o) == CALLF) + +static FILE *ef; +static long eline= 1; +static char *efile; +static char *orig_efile; +static char *opcode2name_tab[N_OPCODES]; +static enum dialect { ACK, NCC } dialect= ACK; + +static void ack_putchar(int c) +/* LOOK, this programmer checks the return code of putc! What an idiot, noone + * does that! + */ +{ + if (putc(c, ef) == EOF) fatal(orig_efile); +} + +static void ack_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (vfprintf(ef, fmt, ap) == EOF) fatal(orig_efile); + va_end(ap); +} + +void ack_emit_init(char *file, const char *banner) +/* Prepare producing an ACK assembly file. */ +{ + mnemonic_t *mp; + + if (file == nil) { + file= "stdout"; + ef= stdout; + } else { + if ((ef= fopen(file, "w")) == nil) fatal(file); + } + orig_efile= file; + efile= file; + ack_printf("! %s", banner); + if (dialect == ACK) { + /* Declare the four sections used under Minix. */ + ack_printf( + "\n.sect .text; .sect .rom; .sect .data; .sect .bss\n.sect .text"); + } + + /* Initialize the opcode to mnemonic translation table. */ + for (mp= mnemtab; mp < arraylimit(mnemtab); mp++) { + assert(opcode2name_tab[mp->opcode] == nil); + opcode2name_tab[mp->opcode]= mp->name; + } +} + +#define opcode2name(op) (opcode2name_tab[op] + 0) + +static void ack_put_string(const char *s, size_t n) +/* Emit a string with weird characters quoted. */ +{ + while (n > 0) { + int c= *s; + + if (c < ' ' || c > 0177) { + ack_printf("\\%03o", c & 0xFF); + } else + if (c == '"' || c == '\\') { + ack_printf("\\%c", c); + } else { + ack_putchar(c); + } + s++; + n--; + } +} + +static void ack_put_expression(asm86_t *a, expression_t *e, int deref) +/* Send an expression, i.e. instruction operands, to the output file. Deref + * is true when the rewrite for the ncc dialect may be made. + */ +{ + int isglob; + + assert(e != nil); + + isglob = syms_is_global(e->name); + + switch (e->operator) { + case ',': + if (dialect == NCC && farjmp(a->opcode)) { + /* ACK jmpf seg:off -> NCC jmpf off,seg */ + ack_put_expression(a, e->right, deref); + ack_printf(", "); + ack_put_expression(a, e->left, deref); + } else { + ack_put_expression(a, e->left, deref); + ack_printf(farjmp(a->opcode) ? ":" : ", "); + ack_put_expression(a, e->right, deref); + } + break; + case 'O': + if (deref && a->optype == JUMP) ack_putchar('@'); + if (e->left != nil) ack_put_expression(a, e->left, 0); + if (e->middle != nil) ack_put_expression(a, e->middle, 0); + if (e->right != nil) ack_put_expression(a, e->right, 0); + break; + case '(': + if (deref && a->optype == JUMP) ack_putchar('@'); + if (!deref) ack_putchar('('); + ack_put_expression(a, e->middle, 0); + if (!deref) ack_putchar(')'); + break; + case 'B': + ack_printf("(%s%s)", isglob ? "_" : "", e->name); + break; + case '1': + case '2': + case '4': + case '8': + ack_printf((use16() && e->operator == '1') + ? "(%s%s)" : "(%s%s*%c)", isglob ? "_" : "", + e->name, e->operator); + break; + case '+': + case '-': + case '~': + if (e->middle != nil) { + if (deref && a->optype != JUMP) ack_putchar('#'); + ack_putchar(e->operator); + ack_put_expression(a, e->middle, 0); + break; + } + /*FALL THROUGH*/ + case '*': + case '/': + case '%': + case '&': + case '|': + case '^': + case S_LEFTSHIFT: + case S_RIGHTSHIFT: + if (deref && a->optype != JUMP) ack_putchar('#'); + ack_put_expression(a, e->left, 0); + if (e->operator == S_LEFTSHIFT) { + ack_printf("<<"); + } else + if (e->operator == S_RIGHTSHIFT) { + ack_printf(">>"); + } else { + ack_putchar(e->operator); + } + ack_put_expression(a, e->right, 0); + break; + case '[': + if (deref && a->optype != JUMP) ack_putchar('#'); + ack_putchar('['); + ack_put_expression(a, e->middle, 0); + ack_putchar(']'); + break; + case 'W': + if (deref && a->optype == JUMP && isregister(e->name)) + { + ack_printf("(%s)", e->name); + break; + } + if (deref && a->optype != JUMP && !isregister(e->name)) { + ack_putchar('#'); + } + ack_printf("%s%s", isglob ? "_" : "", e->name); + break; + case 'S': + ack_putchar('"'); + ack_put_string(e->name, e->len); + ack_putchar('"'); + break; + default: + fprintf(stderr, + "asmconv: internal error, unknown expression operator '%d'\n", + e->operator); + exit(EXIT_FAILURE); + } +} + +void ack_emit_instruction(asm86_t *a) +/* Output one instruction and its operands. */ +{ + int same= 0; + char *p; + static int high_seg; + int deref; + static int prevop; + + if (a == nil) { + /* Last call */ + ack_putchar('\n'); + return; + } + + /* Make sure the line number of the line to be emitted is ok. */ + if ((a->file != efile && strcmp(a->file, efile) != 0) + || a->line < eline || a->line > eline+10) { + ack_putchar('\n'); + ack_printf("! %ld \"%s\"\n", a->line, a->file); + efile= a->file; + eline= a->line; + } else { + if (a->line == eline) { + if (prevop == DOT_LABEL) { + ack_printf("\n"); + same = 0; + } + else { + ack_printf("; "); + same= 1; + } + } + while (eline < a->line) { + ack_putchar('\n'); + eline++; + } + } + + if (a->opcode == DOT_LABEL) + prevop = DOT_LABEL; + else + prevop = 0; + + if (a->opcode == COMMENT || + a->opcode == C_PREPROCESSOR || + a->opcode == UNKNOWN) { + if (a->opcode == COMMENT) + if (a->raw_string[0] != '/') + a->raw_string[0] = '!'; + ack_printf("%s", a->raw_string); + return; + } else + if (a->opcode == DOT_LABEL) { + assert(a->args->operator == ':'); + if (syms_is_global(a->args->name)) + ack_printf("_%s:", a->args->name); + else + ack_printf("%s:", a->args->name); + } else + if (a->opcode == DOT_EQU) { + assert(a->args->operator == '='); + ack_printf("\t%s = ", a->args->name); + ack_put_expression(a, a->args->middle, 0); + } else + if ((p= opcode2name(a->opcode)) != nil) { + char *sep= dialect == ACK ? "" : ";"; + + if (!is_pseudo(a->opcode) && !same) ack_putchar('\t'); + + switch (a->rep) { + case ONCE: break; + case REP: ack_printf("rep"); break; + case REPE: ack_printf("repe"); break; + case REPNE: ack_printf("repne"); break; + default: assert(0); + } + if (a->rep != ONCE) { + ack_printf(dialect == ACK ? " " : "; "); + } + switch (a->seg) { + case DEFSEG: break; + case CSEG: ack_printf("cseg"); break; + case DSEG: ack_printf("dseg"); break; + case ESEG: ack_printf("eseg"); break; + case FSEG: ack_printf("fseg"); break; + case GSEG: ack_printf("gseg"); break; + case SSEG: ack_printf("sseg"); break; + default: assert(0); + } + if (a->seg != DEFSEG) { + ack_printf(dialect == ACK ? " " : "; "); + } + if (a->oaz & OPZ) ack_printf(use16() ? "o32 " : "o16 "); + if (a->oaz & ADZ) ack_printf(use16() ? "a32 " : "a16 "); + + if (a->opcode == CBW) { + p= !(a->oaz & OPZ) == use16() ? "cbw" : "cwde"; + } + + if (a->opcode == CWD) { + p= !(a->oaz & OPZ) == use16() ? "cwd" : "cdq"; + } + + if (a->opcode == DOT_COMM && a->args != nil + && a->args->operator == ',' + && a->args->left->operator == 'W' + ) { + if (syms_is_global(a->args->left->name)) + ack_printf(".define\t_%s; ", a->args->left->name); + else + ack_printf(".define\t%s; ", a->args->left->name); + } +#define IS_OP_CR4(r) ((r) && (r)->name && strcmp((r)->name, "cr4") == 0) + /* unsupported mov to/from cr4 */ + if (a->opcode == MOV && a->args->operator == ',') { + if (IS_OP_CR4(a->args->left)) { + /* read cr4 */ + ack_printf(".data1 0x0f, 0x20, 0xe0\n"); + return; + } + if (IS_OP_CR4(a->args->right)) { + /* write cr4 */ + ack_printf(".data1 0x0f, 0x22, 0xe0\n"); + return; + } + } + /* we are translating from GNU */ + if (a->args && a->args->operator == ',' + /* don't swap ljmp prefixed with segment */ + && a->opcode != JMPF) { + expression_t * tmp; + + tmp = a->args->right; + a->args->right = a->args->left; + a->args->left = tmp; + } + while (*p != 0) { + if (*p == '%') { + if (a->optype == BYTE) ack_putchar('b'); + } else { + ack_putchar(*p); + } + p++; + } + /* + * gnu assembly expresses the ES segment in the long instruction + * format. Not neccessary in ack + */ + if (a->opcode == MOVS) + return; + if (a->args != nil) { + ack_putchar('\t'); + switch (a->opcode) { + case IN: + case OUT: + case INT: + deref= 0; + break; + default: + deref= (dialect == NCC && a->optype != PSEUDO); + } + ack_put_expression(a, a->args, deref); + } + if (a->opcode == DOT_USE16) set_use16(); + if (a->opcode == DOT_USE32) set_use32(); + } else { + fprintf(stderr, + "asmconv: internal error, unknown opcode '%d'\n", + a->opcode); + exit(EXIT_FAILURE); + } + + /* + * comment on the same line as an instruction. Cannot be definition of a + * macro + * */ + if (a->raw_string) { + if (a->raw_string[0] != '/') + a->raw_string[0] = '!'; + ack_printf("\t%s", a->raw_string); + } +} + +/* A few ncc mnemonics are different. */ +static mnemonic_t ncc_mnemtab[] = { + { DOT_BSS, ".bss" }, + { DOT_DATA, ".data" }, + { DOT_END, ".end" }, + { DOT_ROM, ".rom" }, + { DOT_TEXT, ".text" }, +}; + +void ncc_emit_init(char *file, const char *banner) +/* The assembly produced by the Minix ACK ANSI C compiler for the 8086 is + * different from the normal ACK assembly, and different from the old K&R + * assembler. This brings us endless joy. (It was supposed to make + * translation of the assembly used by the old K&R assembler easier by + * not deviating too much from that dialect.) + */ +{ + mnemonic_t *mp; + + dialect= NCC; + ack_emit_init(file, banner); + + /* Replace a few mnemonics. */ + for (mp= ncc_mnemtab; mp < arraylimit(ncc_mnemtab); mp++) { + opcode2name_tab[mp->opcode]= mp->name; + } +} + +void ncc_emit_instruction(asm86_t *a) +{ + ack_emit_instruction(a); +} diff --git a/commands/i386/gas2ack/gas2ack.c b/commands/i386/gas2ack/gas2ack.c new file mode 100644 index 000000000..422e50a66 --- /dev/null +++ b/commands/i386/gas2ack/gas2ack.c @@ -0,0 +1,116 @@ +/* asmconv 1.11 - convert 80X86 assembly Author: Kees J. Bot + * 24 Dec 1993 + */ +static char version[] = "1.11"; + +#define nil 0 +#include +#include +#include +#include +#include +#include +#include "asmconv.h" +#include "asm86.h" +#include "languages.h" + +void fatal(char *label) +{ + fprintf(stderr, "asmconv: %s: %s\n", label, strerror(errno)); + exit(EXIT_FAILURE); +} + +void *allocate(void *mem, size_t size) +/* A checked malloc/realloc(). Yes, I know ISO C allows realloc(NULL, size). */ +{ + mem= mem == nil ? malloc(size) : realloc(mem, size); + if (mem == nil) fatal("malloc()"); + return mem; +} + +void deallocate(void *mem) +/* Free a malloc()d cell. (Yes I know ISO C allows free(NULL) */ +{ + if (mem != nil) free(mem); +} + +char *copystr(const char *s) +{ + char *c; + + c= allocate(nil, (strlen(s) + 1) * sizeof(s[0])); + strcpy(c, s); + return c; +} + +int isanumber(const char *s) +/* True if s can be turned into a number. */ +{ + char *end; + + (void) strtol(s, &end, 0); + return end != s && *end == 0; +} + +/* "Invisible" globals. */ +int asm_mode32= (sizeof(int) == 4); +int err_code= EXIT_SUCCESS; + +int main(int argc, char **argv) +{ + void (*parse_init)(char *file); + asm86_t *(*get_instruction)(void); + void (*emit_init)(char *file, const char *banner); + void (*emit_instruction)(asm86_t *instr); + char *lang_parse, *lang_emit, *input_file, *output_file; + asm86_t *instr; + char banner[80]; + + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == 'm') { + if (strcmp(argv[1], "-mi86") == 0) { + set_use16(); + } else + if (strcmp(argv[1], "-mi386") == 0) { + set_use32(); + } else { + fprintf(stderr, "asmconv: '%s': unknown machine\n", + argv[1]+2); + } + argc--; + argv++; + } + + if (argc > 3) { + fprintf(stderr, "Usage: gas2ack [input-file [output-file]]\n"); + exit(EXIT_FAILURE); + } + + input_file= argc < 1 ? nil : argv[1]; + output_file= argc < 2 ? nil : argv[2]; + + parse_init= gnu_parse_init; + get_instruction= gnu_get_instruction; + + emit_init= ack_emit_init; + emit_instruction= ack_emit_instruction; + + sprintf(banner, "Translated from GNU to ACK by gas2ack"); + + /* get localy defined labels first */ + (*parse_init)(input_file); + for (;;) { + instr= (*get_instruction)(); + if (instr == nil) break; + del_asm86(instr); + } + + (*parse_init)(input_file); + (*emit_init)(output_file, banner); + for (;;) { + instr= (*get_instruction)(); + (*emit_instruction)(instr); + if (instr == nil) break; + del_asm86(instr); + } + exit(err_code); +} diff --git a/commands/i386/gas2ack/globals.c b/commands/i386/gas2ack/globals.c new file mode 100644 index 000000000..80f105f15 --- /dev/null +++ b/commands/i386/gas2ack/globals.c @@ -0,0 +1,134 @@ +/* + * Table of all global definitions. Since the ack convention is to prepend + * syms with '_' for C interfacing, we need to know about them and add/remove + * teh '_' as neccessary + */ + +#include +#include +#include +#include + +#include "asm86.h" + +/* this should be fine for common minix assembly files */ +#define SYM_MAX 1024 +#define SYM_MAX_LEN 64 + +struct sym { + char name[SYM_MAX_LEN]; + int gl; +}; + +static struct sym syms[SYM_MAX]; + +static int syms_num = 0; + +static struct sym * sym_exists(const char * n) +{ + int i; + + for (i = 0; i < syms_num; i++) { + if (strcmp(syms[i].name, n) == 0) + return &syms[i]; + } + + return NULL; +} + +static int is_local_label_ref(const char *n) +{ + int i; + int l = strlen(n); + + for(i = 0; i < l - 1; i++) + if (!isdigit(n[i])) + return 0; + if (n[l-1] != 'b' && n[l-1] != 'f') + return 0; + + return 1; +} + +static int is_hex(const char *n) +{ + int i; + for(i = 0; n[i]; i++) + if (!isxdigit(n[i])) + return 0; + return 1; +} + +static int is_dec(const char *n) +{ + int i; + for(i = 0; n[i]; i++) + if (!isdigit(n[i])) + return 0; + return 1; +} + +static int is_number(const char * n) +{ + if (n[0] == '0' && n[1] == 'x') + return is_hex(n + 2); + else + return is_dec(n); +} + +int syms_is_global(const char * n) +{ + struct sym *s; + + if (!n || is_number(n) || is_local_label_ref(n) || isregister(n)) + return 0; + + /* if not found, it must be extern -> global */ + if (!(s = sym_exists(n))) + return 1; + + return s->gl; +} + +static int add(const char * n, int isgl) +{ + if (syms_num >= SYM_MAX) + return -ENOMEM; + if (!n || strlen(n) >= SYM_MAX_LEN) + return -EINVAL; + + /* ignore numbers */ + if (is_number(n)) + return 0; + + strcpy(syms[syms_num].name, n); + syms[syms_num].gl = isgl; + syms_num++; + + return 0; +} + +int syms_add(const char *n) +{ + return add(n, 0); +} + +int syms_add_global(const char *n) +{ + return add(n, 1); +} + +void syms_add_global_csl(expression_t * exp) +{ + if (!exp) + return; + + if (exp->operator == ',') { + syms_add_global_csl(exp->left); + syms_add_global_csl(exp->right); + } + else { + syms_add_global(exp->name); + } +} + diff --git a/commands/i386/gas2ack/globals.h b/commands/i386/gas2ack/globals.h new file mode 100644 index 000000000..e3e8d6e17 --- /dev/null +++ b/commands/i386/gas2ack/globals.h @@ -0,0 +1,9 @@ +#ifndef __GLOBALS_H__ +#define __GLOBALS_H__ + +int syms_is_global(const char * gl); +int syms_add(const char * gl); +int syms_add_global(const char * gl); +void syms_add_global_csl(expression_t * exp); + +#endif diff --git a/commands/i386/gas2ack/languages.h b/commands/i386/gas2ack/languages.h new file mode 100644 index 000000000..a34ec9fe4 --- /dev/null +++ b/commands/i386/gas2ack/languages.h @@ -0,0 +1,25 @@ +/* languages.h - functions that parse or emit assembly + * Author: Kees J. Bot + * 27 Dec 1993 + */ + +void ack_parse_init(char *file); +asm86_t *ack_get_instruction(void); + +void ncc_parse_init(char *file); +asm86_t *ncc_get_instruction(void); + +void gnu_parse_init(char *file); +asm86_t *gnu_get_instruction(void); + +void bas_parse_init(char *file); +asm86_t *bas_get_instruction(void); + +void ack_emit_init(char *file, const char *banner); +void ack_emit_instruction(asm86_t *instr); + +void ncc_emit_init(char *file, const char *banner); +void ncc_emit_instruction(asm86_t *instr); + +void gnu_emit_init(char *file, const char *banner); +void gnu_emit_instruction(asm86_t *instr); diff --git a/commands/i386/gas2ack/parse_gnu.c b/commands/i386/gas2ack/parse_gnu.c new file mode 100644 index 000000000..56960b51d --- /dev/null +++ b/commands/i386/gas2ack/parse_gnu.c @@ -0,0 +1,1017 @@ +/* parse_ack.c - parse GNU assembly Author: R.S. Veldema + * + * 26 Aug 1996 + */ +#define nil 0 +#include +#include +#include +#include +#include +#include "asmconv.h" +#include "token.h" +#include "asm86.h" +#include "languages.h" +#include "globals.h" + +typedef struct mnemonic { /* GNU as86 mnemonics translation table. */ + char *name; + opcode_t opcode; + optype_t optype; +} mnemonic_t; + +static mnemonic_t mnemtab[] = { /* This array is sorted. */ + { ".align", DOT_ALIGN, PSEUDO }, + { ".ascii", DOT_ASCII, PSEUDO }, + { ".asciz", DOT_ASCIZ, PSEUDO }, + { ".assert", DOT_ASSERT, PSEUDO }, + { ".balign", DOT_ALIGN, PSEUDO }, + { ".base", DOT_BASE, PSEUDO }, + { ".bss", DOT_BSS, PSEUDO }, + { ".byte", DOT_DATA1, PSEUDO }, + { ".comm", DOT_COMM, PSEUDO }, + { ".data", DOT_DATA, PSEUDO }, + { ".data1", DOT_DATA1, PSEUDO }, + { ".data2", DOT_DATA2, PSEUDO }, + { ".data4", DOT_DATA4, PSEUDO }, + { ".end", DOT_END, PSEUDO }, + { ".extern", DOT_EXTERN, PSEUDO }, + { ".file", DOT_FILE, PSEUDO }, + { ".global", DOT_DEFINE, PSEUDO }, + { ".globl", DOT_DEFINE, PSEUDO }, + { ".lcomm", DOT_LCOMM, PSEUDO }, + { ".line", DOT_LINE, PSEUDO }, + { ".list", DOT_LIST, PSEUDO }, + { ".long", DOT_DATA4, PSEUDO }, + { ".nolist", DOT_NOLIST, PSEUDO }, + { ".rom", DOT_ROM, PSEUDO }, + { ".short", DOT_DATA2, PSEUDO }, + { ".space", DOT_SPACE, PSEUDO }, + { ".symb", DOT_SYMB, PSEUDO }, + { ".text", DOT_TEXT, PSEUDO }, + { ".word", DOT_DATA2, PSEUDO }, + { "aaa", AAA, WORD }, + { "aad", AAD, WORD }, + { "aam", AAM, WORD }, + { "aas", AAS, WORD }, + { "adcb", ADC, BYTE }, + { "adcl", ADC, WORD }, + { "adcw", ADC, OWORD }, + { "addb", ADD, BYTE }, + { "addl", ADD, WORD }, + { "addw", ADD, OWORD }, + { "andb", AND, BYTE }, + { "andl", AND, WORD }, + { "andw", AND, OWORD }, + { "arpl", ARPL, WORD }, + { "bound", BOUND, WORD }, + { "bsf", BSF, WORD }, + { "bsr", BSR, WORD }, + { "bswap", BSWAP, WORD }, + { "btc", BTC, WORD }, + { "btl", BT, WORD }, + { "btr", BTR, WORD }, + { "bts", BTS, WORD }, + { "btw", BT, OWORD }, + { "call", CALL, JUMP }, + { "callf", CALLF, JUMP }, + { "cbtw", CBW, OWORD }, + { "cbw", CBW, WORD }, + { "cdq", CWD, WORD }, + { "clc", CLC, WORD }, + { "cld", CLD, WORD }, + { "cli", CLI, WORD }, + { "cltd", CWD, WORD }, + { "clts", CLTS, WORD }, + { "cmc", CMC, WORD }, + { "cmpb", CMP, BYTE }, + { "cmpl", CMP, WORD }, + { "cmps", CMPS, WORD }, + { "cmpsb", CMPS, BYTE }, + { "cmpsl", CMPS, OWORD }, + { "cmpsw", CMPS, WORD }, + { "cmpw", CMP, OWORD }, + { "cmpxchg", CMPXCHG, WORD }, + { "cpuid", CPUID, WORD }, + { "cwd", CWD, WORD }, + { "cwde", CBW, WORD }, + { "cwtd", CWD, OWORD }, + { "cwtl", CBW, WORD }, + { "daa", DAA, WORD }, + { "das", DAS, WORD }, + { "decb", DEC, BYTE }, + { "decl", DEC, WORD }, + { "decw", DEC, OWORD }, + { "divb", DIV, BYTE }, + { "divl", DIV, WORD }, + { "divw", DIV, OWORD }, + { "enter", ENTER, WORD }, + { "f2xm1", F2XM1, WORD }, + { "fabs", FABS, WORD }, + { "fadd", FADD, WORD }, + { "faddd", FADDD, WORD }, + { "faddp", FADDP, WORD }, + { "fadds", FADDS, WORD }, + { "fbld", FBLD, WORD }, + { "fbstp", FBSTP, WORD }, + { "fchs", FCHS, WORD }, + { "fcomd", FCOMD, WORD }, + { "fcompd", FCOMPD, WORD }, + { "fcompp", FCOMPP, WORD }, + { "fcomps", FCOMPS, WORD }, + { "fcoms", FCOMS, WORD }, + { "fcos", FCOS, WORD }, + { "fdecstp", FDECSTP, WORD }, + { "fdivd", FDIVD, WORD }, + { "fdivp", FDIVP, WORD }, + { "fdivrd", FDIVRD, WORD }, + { "fdivrp", FDIVRP, WORD }, + { "fdivrs", FDIVRS, WORD }, + { "fdivs", FDIVS, WORD }, + { "ffree", FFREE, WORD }, + { "fiaddl", FIADDL, WORD }, + { "fiadds", FIADDS, WORD }, + { "ficom", FICOM, WORD }, + { "ficomp", FICOMP, WORD }, + { "fidivl", FIDIVL, WORD }, + { "fidivrl", FIDIVRL, WORD }, + { "fidivrs", FIDIVRS, WORD }, + { "fidivs", FIDIVS, WORD }, + { "fildl", FILDL, WORD }, + { "fildq", FILDQ, WORD }, + { "filds", FILDS, WORD }, + { "fimull", FIMULL, WORD }, + { "fimuls", FIMULS, WORD }, + { "fincstp", FINCSTP, WORD }, + { "fistl", FISTL, WORD }, + { "fistp", FISTP, WORD }, + { "fists", FISTS, WORD }, + { "fisubl", FISUBL, WORD }, + { "fisubrl", FISUBRL, WORD }, + { "fisubrs", FISUBRS, WORD }, + { "fisubs", FISUBS, WORD }, + { "fld1", FLD1, WORD }, + { "fldcw", FLDCW, WORD }, + { "fldd", FLDD, WORD }, + { "fldenv", FLDENV, WORD }, + { "fldl2e", FLDL2E, WORD }, + { "fldl2t", FLDL2T, WORD }, + { "fldlg2", FLDLG2, WORD }, + { "fldln2", FLDLN2, WORD }, + { "fldpi", FLDPI, WORD }, + { "flds", FLDS, WORD }, + { "fldx", FLDX, WORD }, + { "fldz", FLDZ, WORD }, + { "fmuld", FMULD, WORD }, + { "fmulp", FMULP, WORD }, + { "fmuls", FMULS, WORD }, + { "fnclex", FCLEX, WORD }, + { "fninit", FINIT, WORD }, + { "fnop", FNOP, WORD }, + { "fnsave", FSAVE, WORD }, + { "fnstcw", FSTCW, WORD }, + { "fnstenv", FSTENV, WORD }, + { "fpatan", FPATAN, WORD }, + { "fprem", FPREM, WORD }, + { "fprem1", FPREM1, WORD }, + { "fptan", FPTAN, WORD }, + { "frndint", FRNDINT, WORD }, + { "frstor", FRSTOR, WORD }, + { "fscale", FSCALE, WORD }, + { "fsin", FSIN, WORD }, + { "fsincos", FSINCOS, WORD }, + { "fsqrt", FSQRT, WORD }, + { "fstd", FSTD, WORD }, + { "fstpd", FSTPD, WORD }, + { "fstps", FSTPS, WORD }, + { "fstpx", FSTPX, WORD }, + { "fsts", FSTS, WORD }, + { "fstsw", FSTSW, WORD }, + { "fsubd", FSUBD, WORD }, + { "fsubp", FSUBP, WORD }, + { "fsubpr", FSUBPR, WORD }, + { "fsubrd", FSUBRD, WORD }, + { "fsubrs", FSUBRS, WORD }, + { "fsubs", FSUBS, WORD }, + { "ftst", FTST, WORD }, + { "fucom", FUCOM, WORD }, + { "fucomp", FUCOMP, WORD }, + { "fucompp", FUCOMPP, WORD }, + { "fxam", FXAM, WORD }, + { "fxch", FXCH, WORD }, + { "fxtract", FXTRACT, WORD }, + { "fyl2x", FYL2X, WORD }, + { "fyl2xp1", FYL2XP1, WORD }, + { "hlt", HLT, WORD }, + { "idivb", IDIV, BYTE }, + { "idivl", IDIV, WORD }, + { "idivw", IDIV, OWORD }, + { "imulb", IMUL, BYTE }, + { "imull", IMUL, WORD }, + { "imulw", IMUL, OWORD }, + { "inb", IN, BYTE }, + { "incb", INC, BYTE }, + { "incl", INC, WORD }, + { "incw", INC, OWORD }, + { "inl", IN, WORD }, + { "insb", INS, BYTE }, + { "insl", INS, WORD }, + { "insw", INS, OWORD }, + { "int", INT, WORD }, + { "into", INTO, JUMP }, + { "invd", INVD, WORD }, + { "invlpg", INVLPG, WORD }, + { "inw", IN, OWORD }, + { "iret", IRET, JUMP }, + { "iretd", IRETD, JUMP }, + { "ja", JA, JUMP }, + { "jae", JAE, JUMP }, + { "jb", JB, JUMP }, + { "jbe", JBE, JUMP }, + { "jc", JB, JUMP }, + { "jcxz", JCXZ, JUMP }, + { "je", JE, JUMP }, + { "jecxz", JCXZ, JUMP }, + { "jg", JG, JUMP }, + { "jge", JGE, JUMP }, + { "jl", JL, JUMP }, + { "jle", JLE, JUMP }, + { "jmp", JMP, JUMP }, + { "jmpf", JMPF, JUMP }, + { "jna", JBE, JUMP }, + { "jnae", JB, JUMP }, + { "jnb", JAE, JUMP }, + { "jnbe", JA, JUMP }, + { "jnc", JAE, JUMP }, + { "jne", JNE, JUMP }, + { "jng", JLE, JUMP }, + { "jnge", JL, JUMP }, + { "jnl", JGE, JUMP }, + { "jnle", JG, JUMP }, + { "jno", JNO, JUMP }, + { "jnp", JNP, JUMP }, + { "jns", JNS, JUMP }, + { "jnz", JNE, JUMP }, + { "jo", JO, JUMP }, + { "jp", JP, JUMP }, + { "js", JS, JUMP }, + { "jz", JE, JUMP }, + { "lahf", LAHF, WORD }, + { "lar", LAR, WORD }, + { "lds", LDS, WORD }, + { "leal", LEA, WORD }, + { "leave", LEAVE, WORD }, + { "leaw", LEA, OWORD }, + { "les", LES, WORD }, + { "lfs", LFS, WORD }, + { "lgdt", LGDT, WORD }, + { "lgs", LGS, WORD }, + { "lidt", LIDT, WORD }, + { "ljmp", JMPF, JUMP }, + { "ljmpw", JMPF, JUMP16 }, + { "lldt", LLDT, WORD }, + { "lmsw", LMSW, WORD }, + { "lock", LOCK, WORD }, + { "lods", LODS, WORD }, + { "lodsb", LODS, BYTE }, + { "loop", LOOP, JUMP }, + { "loope", LOOPE, JUMP }, + { "loopne", LOOPNE, JUMP }, + { "loopnz", LOOPNE, JUMP }, + { "loopz", LOOPE, JUMP }, + { "lretw", RETF, JUMP16 }, + { "lsl", LSL, WORD }, + { "lss", LSS, WORD }, + { "ltr", LTR, WORD }, + { "movb", MOV, BYTE }, + { "movl", MOV, WORD }, + { "movsb", MOVS, BYTE }, + { "movsbl", MOVSXB, WORD }, + { "movsbw", MOVSXB, OWORD }, + { "movsl", MOVS, WORD }, + { "movsw", MOVS, OWORD }, + { "movswl", MOVSX, WORD }, + { "movw", MOV, OWORD }, + { "movzbl", MOVZXB, WORD }, + { "movzbw", MOVZXB, OWORD }, + { "movzwl", MOVZX, WORD }, + { "mulb", MUL, BYTE }, + { "mull", MUL, WORD }, + { "mulw", MUL, OWORD }, + { "negb", NEG, BYTE }, + { "negl", NEG, WORD }, + { "negw", NEG, OWORD }, + { "nop", NOP, WORD }, + { "notb", NOT, BYTE }, + { "notl", NOT, WORD }, + { "notw", NOT, OWORD }, + { "orb", OR, BYTE }, + { "orl", OR, WORD }, + { "orw", OR, OWORD }, + { "outb", OUT, BYTE }, + { "outl", OUT, WORD }, + { "outsb", OUTS, BYTE }, + { "outsl", OUTS, WORD }, + { "outsw", OUTS, OWORD }, + { "outw", OUT, OWORD }, + { "pop", POP, WORD }, + { "popa", POPA, WORD }, + { "popal", POPAD, WORD }, + { "popf", POPF, WORD }, + { "popfl", POPF, WORD }, + { "popl", POP, WORD }, + { "popw", POP, OWORD }, + { "push", PUSH, WORD }, + { "pusha", PUSHA, WORD }, + { "pushal", PUSHAD, WORD }, + { "pushf", PUSHF, WORD }, + { "pushl", PUSH, WORD }, + { "pushw", PUSH, OWORD }, + { "rclb", RCL, BYTE }, + { "rcll", RCL, WORD }, + { "rclw", RCL, OWORD }, + { "rcrb", RCR, BYTE }, + { "rcrl", RCR, WORD }, + { "rcrw", RCR, OWORD }, + { "ret", RET, JUMP }, + { "retf", RETF, JUMP }, + { "rolb", ROL, BYTE }, + { "roll", ROL, WORD }, + { "rolw", ROL, OWORD }, + { "rorb", ROR, BYTE }, + { "rorl", ROR, WORD }, + { "rorw", ROR, OWORD }, + { "sahf", SAHF, WORD }, + { "salb", SAL, BYTE }, + { "sall", SAL, WORD }, + { "salw", SAL, OWORD }, + { "sarb", SAR, BYTE }, + { "sarl", SAR, WORD }, + { "sarw", SAR, OWORD }, + { "sbbb", SBB, BYTE }, + { "sbbl", SBB, WORD }, + { "sbbw", SBB, OWORD }, + { "scasb", SCAS, BYTE }, + { "scasl", SCAS, WORD }, + { "scasw", SCAS, OWORD }, + { "seta", SETA, BYTE }, + { "setae", SETAE, BYTE }, + { "setb", SETB, BYTE }, + { "setbe", SETBE, BYTE }, + { "sete", SETE, BYTE }, + { "setg", SETG, BYTE }, + { "setge", SETGE, BYTE }, + { "setl", SETL, BYTE }, + { "setna", SETBE, BYTE }, + { "setnae", SETB, BYTE }, + { "setnb", SETAE, BYTE }, + { "setnbe", SETA, BYTE }, + { "setne", SETNE, BYTE }, + { "setng", SETLE, BYTE }, + { "setnge", SETL, BYTE }, + { "setnl", SETGE, BYTE }, + { "setnle", SETG, BYTE }, + { "setno", SETNO, BYTE }, + { "setnp", SETNP, BYTE }, + { "setns", SETNS, BYTE }, + { "seto", SETO, BYTE }, + { "setp", SETP, BYTE }, + { "sets", SETS, BYTE }, + { "setz", SETE, BYTE }, + { "sgdt", SGDT, WORD }, + { "shlb", SHL, BYTE }, + { "shldl", SHLD, WORD }, + { "shll", SHL, WORD }, + { "shlw", SHL, OWORD }, + { "shrb", SHR, BYTE }, + { "shrdl", SHRD, WORD }, + { "shrl", SHR, WORD }, + { "shrw", SHR, OWORD }, + { "sidt", SIDT, WORD }, + { "sldt", SLDT, WORD }, + { "smsw", SMSW, WORD }, + { "stc", STC, WORD }, + { "std", STD, WORD }, + { "sti", STI, WORD }, + { "stosb", STOS, BYTE }, + { "stosl", STOS, WORD }, + { "stosw", STOS, OWORD }, + { "str", STR, WORD }, + { "subb", SUB, BYTE }, + { "subl", SUB, WORD }, + { "subw", SUB, OWORD }, + { "testb", TEST, BYTE }, + { "testl", TEST, WORD }, + { "testw", TEST, OWORD }, + { "verr", VERR, WORD }, + { "verw", VERW, WORD }, + { "wait", WAIT, WORD }, + { "wbinvd", WBINVD, WORD }, + { "xadd", XADD, WORD }, + { "xchgb", XCHG, BYTE }, + { "xchgl", XCHG, WORD }, + { "xchgw", XCHG, OWORD }, + { "xlat", XLAT, WORD }, + { "xorb", XOR, BYTE }, + { "xorl", XOR, WORD }, + { "xorw", XOR, OWORD }, +}; + +void gnu_parse_init(char *file) +/* Prepare parsing of an GNU assembly file. */ +{ + tok_init(file, '#'); +} + +static void zap(void) +/* An error, zap the rest of the line. */ +{ + token_t *t; + + while ((t= get_token(0))->type != T_EOF && t->symbol != ';' + && t->type != T_COMMENT) + skip_token(1); +} + +/* same as in ACK */ +static int zap_unknown(asm86_t *a) +/* An error, zap the rest of the line. */ +{ + token_t *t; +#define MAX_ASTR 4096 + char astr[MAX_ASTR]; + unsigned astr_len = 0; + + astr[astr_len++] = '\t'; + while ((t= get_token(0))->type != T_EOF && t->symbol != ';' + && t->type != T_COMMENT) { + switch(t->type) { + case T_CHAR: + astr[astr_len++] = t->symbol; + break; + case T_WORD: + case T_STRING: + strncpy(astr + astr_len, t->name, t->len); + astr_len += t->len; + break; + + } + skip_token(1); + } + astr[astr_len++] = '\0'; + + a->raw_string = malloc(astr_len); + if (!a->raw_string) + return -1; + + strcpy(a->raw_string, astr); + + return 0; +} + +static mnemonic_t *search_mnem(char *name) +/* Binary search for a mnemonic. (That's why the table is sorted.) */ +{ + int low, mid, high; + int cmp; + mnemonic_t *m; + char name_buf[64]; + int brk = 0; + +try_long: + low= 0; + high= arraysize(mnemtab)-1; + while (low <= high) { + mid= (low + high) / 2; + m= &mnemtab[mid]; + + if ((cmp= strcmp(name, m->name)) == 0) return m; + + if (cmp < 0) high= mid-1; else low= mid+1; + } + + /* + * in gnu the modifier 'l' is usually omitted, however we need the + * information about the arguments length. Therefore we try if we know + * such instruction. It covers most of the cases of unknown instructions + */ + if (!brk) { + int len = strlen(name); + strcpy(name_buf, name); + name_buf[len] = 'l'; + name_buf[len + 1] = '\0'; + name = name_buf; + brk = 1; + goto try_long; + } + + return nil; +} + +static expression_t *gnu_get_C_expression(int *pn) +/* Read a "C-like" expression. Note that we don't worry about precedence, + * the expression is printed later like it is read. If the target language + * does not have all the operators (like ~) then this has to be repaired by + * changing the source file. (No problem, you still have one source file + * to maintain, not two.) + */ +{ + expression_t *e, *a1, *a2; + token_t *t; + + if ((t= get_token(*pn))->symbol == '(') { + /* ( expr ): grouping. */ + (*pn)++; + if ((a1= gnu_get_C_expression(pn)) == nil) return nil; + if (get_token(*pn)->symbol != ')') { + parse_err(1, t, "missing )\n"); + del_expr(a1); + return nil; + } + (*pn)++; + e= new_expr(); + e->operator= '['; + e->middle= a1; + } else + if (t->type == T_WORD || t->type == T_STRING) { + /* Label, number, or string. */ + e= new_expr(); + e->operator= t->type == T_WORD ? 'W' : 'S'; + e->name= allocate(nil, (t->len+1) * sizeof(e->name[0])); + memcpy(e->name, t->name , t->len+1); + e->len= t->len; + (*pn)++; + } else + if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') { + /* Unary operator. */ + (*pn)++; + if ((a1= gnu_get_C_expression(pn)) == nil) return nil; + e= new_expr(); + e->operator= t->symbol; + e->middle= a1; + } else { + parse_err(1, t, "expression syntax error\n"); + return nil; + } + + switch ((t= get_token(*pn))->symbol) { + case '%': + case '+': + case '-': + case '*': + case '/': + case '&': + case '|': + case '^': + case S_LEFTSHIFT: + case S_RIGHTSHIFT: + (*pn)++; + a1= e; + if ((a2= gnu_get_C_expression(pn)) == nil) { + del_expr(a1); + return nil; + } + e= new_expr(); + e->operator= t->symbol; + e->left= a1; + e->right= a2; + } + return e; +} + +static expression_t *gnu_get_operand(asm86_t * a, int *pn, int deref) +/* Get something like: $immed, memory, offset(%base,%index,scale), or simpler. */ +{ + expression_t *e, *offset, *base, *index; + token_t *t; + int c; + + if (get_token(*pn)->symbol == '$') { + /* An immediate value. */ + (*pn)++; + return gnu_get_C_expression(pn); + } + + if (get_token(*pn)->symbol == '*') { + (*pn)++; + deref = 1; +#if 0 + if ((offset= gnu_get_operand(a, pn, deref)) == nil) return nil; +#if 0 + e= new_expr(); + e->operator= '('; + e->middle= offset; + return e; +#endif + return offset; +#endif + } + + if ((get_token(*pn)->symbol == '%') + && (t= get_token(*pn + 1))->type == T_WORD + && isregister(t->name) + ) { + /* A register operand. */ + (*pn)+= 2; + e= new_expr(); + e->operator= 'W'; + e->name= copystr(t->name); + return e; + } + + /* Offset? */ + if (get_token(*pn)->symbol != '(' + || get_token(*pn + 1)->symbol != '%') { + /* There is an offset. */ + if ((offset= gnu_get_C_expression(pn)) == nil) return nil; + } else { + /* No offset. */ + offset= nil; + } + + /* (%base,%index,scale) ? */ + base= index= nil; + if (get_token(*pn)->symbol == '(') { + (*pn)++; + + /* %base ? */ + if (get_token(*pn)->symbol == '%' + && (t= get_token(*pn + 1))->type == T_WORD + && isregister(t->name) + ) { + /* A base register expression. */ + base= new_expr(); + base->operator= 'B'; + base->name= copystr(t->name); + (*pn)+= 2; + } + + if (get_token(*pn)->symbol == ',') (*pn)++; + + /* %index ? */ + if (get_token(*pn)->symbol == '%' + && (t= get_token(*pn + 1))->type == T_WORD + && isregister(t->name) + ) { + /* A index register expression. */ + index= new_expr(); + index->operator= '1'; /* for now */ + index->name= copystr(t->name); + (*pn)+= 2; + } + + if (get_token(*pn)->symbol == ',') (*pn)++; + + /* scale ? */ + if ((base != nil || index != nil) + && (t= get_token(*pn))->type == T_WORD + && strchr("1248", t->name[0]) != nil + && t->name[1] == 0 + ) { + if (index == nil) { + /* Base is really an index register. */ + index= base; + base= nil; + } + index->operator= t->name[0]; + (*pn)++; + } + + if (get_token(*pn)->symbol == ')') { + /* Ending paren. */ + (*pn)++; + } else { + /* Alas. */ + parse_err(1, t, "operand syntax error\n"); + del_expr(offset); + del_expr(base); + del_expr(index); + return nil; + } + } + + if (base == nil && index == nil) { + if (deref) { + /* Return a lone offset as (offset). */ + e= new_expr(); + e->operator= '('; + e->middle= offset; + } else { + /* Return a lone offset as is. */ + e= offset; + } + } else { + e= new_expr(); + e->operator= 'O'; + e->left= offset; + + e->middle= base; + e->right= index; + } + return e; +} + +static expression_t *gnu_get_oplist(asm86_t * a, int *pn, int deref) +/* Get a comma (or colon for jmpf and callf) separated list of instruction + * operands. + */ +{ + expression_t *e, *o1, *o2; + token_t *t; + int sreg; + + if ((e= gnu_get_operand(a, pn, deref)) == nil) return nil; + + t = get_token(*pn); + + if (t->symbol == ':' && IS_REGSEG(sreg = isregister(e->name))) { + a->seg = segreg2seg(sreg); + del_expr(e); + (*pn)++; + e = gnu_get_oplist(a, pn, deref); + } + else if (t->symbol == ',' || t->symbol == ':') { + o1= e; + (*pn)++; + if ((o2= gnu_get_oplist(a, pn, deref)) == nil) { + del_expr(o1); + return nil; + } + e= new_expr(); + e->operator= ','; + e->left= o2; + e->right= o1; + } + return e; +} + + +static asm86_t *gnu_get_statement(void) +/* Get a pseudo op or machine instruction with arguments. */ +{ + token_t *t= get_token(0); + token_t *tn; + asm86_t *a; + mnemonic_t *m; + int n; + int prefix_seen; + int deref; + + assert(t->type == T_WORD); + + a= new_asm86(); + + /* Process instruction prefixes. */ + for (prefix_seen= 0;; prefix_seen= 1) { + if (strcmp(t->name, "rep") == 0 + || strcmp(t->name, "repe") == 0 + || strcmp(t->name, "repne") == 0 + || strcmp(t->name, "repz") == 0 + || strcmp(t->name, "repnz") == 0 + ) { + if (a->rep != ONCE) { + parse_err(1, t, + "can't have more than one rep\n"); + } + switch (t->name[3]) { + case 0: a->rep= REP; break; + case 'e': + case 'z': a->rep= REPE; break; + case 'n': a->rep= REPNE; break; + } + } else + if (!prefix_seen) { + /* No prefix here, get out! */ + break; + } else { + /* No more prefixes, next must be an instruction. */ + if (t->type != T_WORD + || (m= search_mnem(t->name)) == nil + || m->optype == PSEUDO + ) { + parse_err(1, t, + "machine instruction expected after instruction prefix\n"); + del_asm86(a); + return nil; + } + break; + } + + /* Skip the prefix and extra newlines. */ + do { + skip_token(1); + } while ((t= get_token(0))->symbol == ';'); + } + + /* All the readahead being done upsets the line counter. */ + a->line= t->line; + + /* Read a machine instruction or pseudo op. */ + if ((m= search_mnem(t->name)) == nil) { + /* we assume that unknown stuff is part of unresolved macro */ + a->opcode = UNKNOWN; + if (zap_unknown(a)) { + parse_err(1, t, "unknown instruction '%s'\n", t->name); + del_asm86(a); + return nil; + } + return a; + } + a->opcode= m->opcode; + a->optype= m->optype; + a->oaz= 0; + if (a->optype == OWORD) { + a->oaz|= OPZ; + a->optype= WORD; + } + else if (a->optype == JUMP16) { + a->oaz|= OPZ; + a->optype= JUMP; + } + + switch (a->opcode) { + case IN: + case OUT: + case INT: + deref= 0; + break; + default: + deref= (a->optype >= BYTE); + } + n= 1; + if (get_token(1)->type != T_COMMENT && get_token(1)->symbol != ';' + && (a->args= gnu_get_oplist(a, &n, deref)) == nil) { + del_asm86(a); + return nil; + } + tn = get_token(n); + if (tn->type == T_COMMENT) { + a->raw_string = malloc(tn->len + 1); + if (!a->raw_string) + return NULL; + + strcpy(a->raw_string, tn->name); + } else + if (get_token(n)->symbol != ';') { + parse_err(1, t, "garbage at end of instruction\n"); + del_asm86(a); + return nil; + } + if (!is_pseudo(a->opcode)) { + /* GNU operand order is the other way around. */ + expression_t *e, *t; + + e= a->args; + while (e != nil && e->operator == ',') { + t= e->right; e->right= e->left; e->left= t; + e= e->left; + } + } + switch (a->opcode) { + case DOT_ALIGN: + /* Delete two argument .align, because ACK can't do it. + */ + if (a->args == nil || a->args->operator != 'W') { + del_asm86(a); + return nil; + } + if (a->args != nil && a->args->operator == 'W' + && isanumber(a->args->name) + ) { + unsigned n; + char num[sizeof(int) * CHAR_BIT / 3 + 1]; + + n= strtoul(a->args->name, nil, 0); + sprintf(num, "%u", n); + deallocate(a->args->name); + a->args->name= copystr(num); + } + break; + case DOT_DEFINE: + case DOT_EXTERN: + syms_add_global_csl(a->args); + break; + case DOT_COMM: + syms_add_global(a->args->left->name); + break; + case DOT_LCOMM: + syms_add(a->args->left->name); + break; + case JMPF: + case CALLF: + /*FALL THROUGH*/ + case JMP: + case CALL: + break; + default:; + } + skip_token(n+1); + return a; +} + + +asm86_t *gnu_get_instruction(void) +{ + asm86_t *a= nil; + expression_t *e; + token_t *t; + + while ((t= get_token(0))->symbol == ';' || t->symbol == '/') { + zap(); /* if a comment started by a '/' */ + skip_token(1); + } + + if (t->type == T_EOF) return nil; + + if (t->type == T_COMMENT || t->type == T_C_PREPROCESSOR) { + + a = new_asm86(); + if (t->type == T_COMMENT) + a->opcode = COMMENT; + else + a->opcode = C_PREPROCESSOR; + + a->raw_string = malloc(t->len + 1); + if (!a->raw_string) + return NULL; + + strcpy(a->raw_string, t->name); + skip_token(1); + return a; + } + + if (t->symbol == '#') { + /* Preprocessor line and file change. */ + + if ((t= get_token(1))->type != T_WORD || !isanumber(t->name) + || get_token(2)->type != T_STRING + ) { + parse_err(1, t, "file not preprocessed?\n"); + zap(); + } else { + set_file(get_token(2)->name, + strtol(get_token(1)->name, nil, 0) - 1); + + /* GNU CPP adds extra cruft, simply zap the line. */ + zap(); + } + a= gnu_get_instruction(); + } else + if (t->type == T_WORD && get_token(1)->symbol == ':') { + /* A label definition. */ + + a= new_asm86(); + a->line= t->line; + a->opcode= DOT_LABEL; + a->optype= PSEUDO; + a->args= e= new_expr(); + e->operator= ':'; + e->name= copystr(t->name); + syms_add(t->name); + skip_token(2); + } else + if (t->type == T_WORD && get_token(1)->symbol == '=') { + int n= 2; + + if ((e= gnu_get_C_expression(&n)) == nil) { + zap(); + a= gnu_get_instruction(); + } else + if (get_token(n)->type != T_COMMENT && get_token(n)->symbol != ';') { + parse_err(1, t, "garbage after assignment\n"); + zap(); + a= gnu_get_instruction(); + } else { + a= new_asm86(); + if (get_token(n)->type == T_COMMENT) { + token_t *c = get_token(n); + + a->raw_string = malloc(c->len + 1); + if (!a->raw_string) + return NULL; + + strcpy(a->raw_string, c->name); + } + a->line= t->line; + a->opcode= DOT_EQU; + a->optype= PSEUDO; + a->args= new_expr(); + a->args->operator= '='; + a->args->name= copystr(t->name); + syms_add(t->name); + a->args->middle= e; + skip_token(n+1); + } + } else + if (t->type == T_WORD) { + if ((a= gnu_get_statement()) == nil) { + zap(); + a= gnu_get_instruction(); + } + } else { + parse_err(1, t, "syntax error\n"); + zap(); + a= gnu_get_instruction(); + } + return a; +} diff --git a/commands/i386/gas2ack/token.h b/commands/i386/gas2ack/token.h new file mode 100644 index 000000000..fbf8ae68f --- /dev/null +++ b/commands/i386/gas2ack/token.h @@ -0,0 +1,32 @@ +/* token.h - token definition Author: Kees J. Bot + * 13 Dec 1993 + */ + +typedef enum toktype { + T_EOF, + T_CHAR, + T_WORD, + T_STRING, + T_COMMENT, + T_C_PREPROCESSOR +} toktype_t; + +typedef struct token { + struct token *next; + long line; + toktype_t type; + int symbol; /* Single character symbol. */ + char *name; /* Word, number, etc. */ + size_t len; /* Length of string. */ +} token_t; + +#define S_LEFTSHIFT 0x100 /* << */ +#define S_RIGHTSHIFT 0x101 /* >> */ + +void set_file(char *file, long line); +void get_file(char **file, long *line); +void parse_err(int err, token_t *where, const char *fmt, ...); +void parse_warn(int err, token_t *t, const char *fmt, ...); +void tok_init(char *file, int comment); +token_t *get_token(int n); +void skip_token(int n); diff --git a/commands/i386/gas2ack/tokenize.c b/commands/i386/gas2ack/tokenize.c new file mode 100644 index 000000000..51edbdf3a --- /dev/null +++ b/commands/i386/gas2ack/tokenize.c @@ -0,0 +1,410 @@ +/* tokenize.c - split input into tokens Author: Kees J. Bot + * 13 Dec 1993 + */ +#define nil 0 +#include +#include +#include +#include +#include +#include "asmconv.h" +#include "token.h" + +static FILE *tf; +static char *tfile; +static char *orig_tfile; +static int tcomment; +static int tc; +static long tline; +static token_t *tq; +static int nl, prevnl; + +static int old_n= 0; /* To speed up n, n+1, n+2, ... accesses. */ +static token_t **old_ptq= &tq; + +static void tok_reset(void) +{ + nl = prevnl = 0; + tline = 0; + if (tf) + fclose(tf); /* ignore error */ + tf = NULL; + old_n = 0; + old_ptq = &tq; + tq = NULL; +} + +static void readtc(void) +/* Read one character from the input file and put it in the global 'tc'. */ +{ + if (nl) tline++; + if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile); + prevnl = nl; + nl= (tc == '\n'); +} + +static void unreadtc(int tc) +{ + if (tc == '\n') + tline--; + nl = prevnl; + ungetc(tc, tf); +} + +void set_file(char *file, long line) +/* Set file name and line number, changed by a preprocessor trick. */ +{ + deallocate(tfile); + tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0])); + strcpy(tfile, file); + tline= line; +} + +void get_file(char **file, long *line) +/* Get file name and line number. */ +{ + *file= tfile; + *line= tline; +} + +void parse_err(int err, token_t *t, const char *fmt, ...) +/* Report a parsing error. */ +{ + va_list ap; + + fprintf(stderr, "* error : \"%s\", line %ld: ", tfile, + t == nil ? tline : t->line); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + if (err) set_error(); +} + +void parse_warn(int err, token_t *t, const char *fmt, ...) +/* Report a parsing error. */ +{ + va_list ap; + + fprintf(stderr, "warning : \"%s\", line %ld: ", tfile, + t == nil ? tline : t->line); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void tok_init(char *file, int comment) +/* Open the file to tokenize and initialize the tokenizer. */ +{ + tok_reset(); + + if (file == nil) { + file= "stdin"; + tf= stdin; + } else { + if ((tf= fopen(file, "r")) == nil) fatal(file); + } + orig_tfile= file; + set_file(file, 1); + readtc(); + tcomment= comment; +} + +static int isspace(int c) +{ + return between('\0', c, ' ') && c != '\n'; +} + +#define iscomment(c) ((c) == tcomment) + +static int isidentchar(int c) +{ + return between('a', c, 'z') + || between('A', c, 'Z') + || between('0', c, '9') + || c == '.' + || c == '_' + ; +} + +static token_t *new_token(void) +{ + token_t *new; + + new= allocate(nil, sizeof(*new)); + new->next= nil; + new->line= tline; + new->name= nil; + new->symbol= -1; + return new; +} + +static token_t *get_word(void) +/* Read one word, an identifier, a number, a label, or a mnemonic. */ +{ + token_t *w; + char *name; + size_t i, len; + + i= 0; + len= 16; + name= allocate(nil, len * sizeof(name[0])); + + while (isidentchar(tc)) { + name[i++]= tc; + readtc(); + if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0])); + } + name[i]= 0; + name= allocate(name, (i+1) * sizeof(name[0])); + w= new_token(); + w->type= T_WORD; + w->name= name; + w->len= i; + return w; +} + +static token_t *get_string(void) +/* Read a single or double quotes delimited string. */ +{ + token_t *s; + int quote; + char *str; + size_t i, len; + int n, j; + int seen; + + quote= tc; + readtc(); + + i= 0; + len= 16; + str= allocate(nil, len * sizeof(str[0])); + + while (tc != quote && tc != '\n' && tc != EOF) { + seen= -1; + if (tc == '\\') { + readtc(); + if (tc == '\n' || tc == EOF) break; + + switch (tc) { + case 'a': tc= '\a'; break; + case 'b': tc= '\b'; break; + case 'f': tc= '\f'; break; + case 'n': tc= '\n'; break; + case 'r': tc= '\r'; break; + case 't': tc= '\t'; break; + case 'v': tc= '\v'; break; + case 'x': + n= 0; + for (j= 0; j < 3; j++) { + readtc(); + if (between('0', tc, '9')) + tc-= '0' + 0x0; + else + if (between('A', tc, 'A')) + tc-= 'A' + 0xA; + else + if (between('a', tc, 'a')) + tc-= 'a' + 0xa; + else { + seen= tc; + break; + } + n= n*0x10 + tc; + } + tc= n; + break; + default: + if (!between('0', tc, '9')) break; + n= 0; + for (j= 0; j < 3; j++) { + if (between('0', tc, '9')) + tc-= '0'; + else { + seen= tc; + break; + } + n= n*010 + tc; + readtc(); + } + tc= n; + } + } + str[i++]= tc; + if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0])); + + if (seen < 0) readtc(); else tc= seen; + } + + if (tc == quote) { + readtc(); + } else { + parse_err(1, nil, "string contains newline\n"); + } + str[i]= 0; + str= allocate(str, (i+1) * sizeof(str[0])); + s= new_token(); + s->type= T_STRING; + s->name= str; + s->len= i; + return s; +} + +#define MAX_TOKEN_STR_SIZE 4096 +static char token_str[MAX_TOKEN_STR_SIZE]; +static unsigned token_sz; + +token_t *get_token(int n) +/* Return the n-th token on the input queue. */ +{ + token_t *t, **ptq; + + assert(n >= 0); + + if (0 && n >= old_n) { + /* Go forward from the previous point. */ + n-= old_n; + old_n+= n; + ptq= old_ptq; + } else { + /* Restart from the head of the queue. */ + old_n= n; + ptq= &tq; + } + + for (;;) { + if ((t= *ptq) == nil) { + /* consume white spaces */ + while (isspace(tc)) + readtc(); + /* read long C comments */ + if (tc == '/') { + readtc(); + if (tc != '*') { + unreadtc(tc); + tc = '/'; + } + else { + token_sz = 2; + t = new_token(); + t->type = T_COMMENT; + token_str[0] = '/'; + token_str[1] = '*'; + readtc(); + for(;;) { + while (tc != EOF && tc != '/') { + token_str[token_sz++] = (char)tc; + readtc(); + } + if (tc == '/') { + if (token_str[token_sz - 1] == '*') { + token_str[token_sz++] = (char)tc; + readtc(); + break; + } + token_str[token_sz++] = (char)tc; + readtc(); + } + else if (tc == EOF) + break; + } + token_str[token_sz] = 0; + t->name= malloc(token_sz + 1); + if (t->name == NULL) { + fprintf(stderr, "malloc() failed\n"); + exit(-1); + } + strcpy(t->name, token_str); + t->len = token_sz; + goto token_found; + } + } + if (iscomment(tc) || tc == '#') { + t = new_token(); + if (iscomment(tc)) + t->type = T_COMMENT; + else + t->type = T_C_PREPROCESSOR; + token_sz = 0; + for(;;) { + while (tc != '\n' && tc != EOF) { + token_str[token_sz++] = (char)tc; + readtc(); + } + if (t->type == T_C_PREPROCESSOR && + token_str[token_sz - 1] == '\\') { + token_str[token_sz++] = '\n'; + tline++; + readtc(); + } + else + break; + } + token_str[token_sz] = 0; + t->name= malloc(token_sz + 1); + if (t->name == NULL) { + fprintf(stderr, "malloc() failed\n"); + exit(-1); + } + strcpy(t->name, token_str); + t->len = token_sz; + goto token_found; + } + + if (tc == EOF) { + t= new_token(); + t->type= T_EOF; + } else + if (isidentchar(tc)) { + t= get_word(); + } else + if (tc == '\'' || tc == '"') { + t= get_string(); + } else { + if (tc == '\n') tc= ';'; + t= new_token(); + t->type= T_CHAR; + t->symbol= tc; + readtc(); + if (t->symbol == '<' && tc == '<') { + t->symbol= S_LEFTSHIFT; + readtc(); + } else + if (t->symbol == '>' && tc == '>') { + t->symbol= S_RIGHTSHIFT; + readtc(); + } + } +token_found: + *ptq= t; + } + if (n == 0) break; + n--; + ptq= &t->next; + } + old_ptq= ptq; + return t; +} + +void skip_token(int n) +/* Remove n tokens from the input queue. One is not allowed to skip unread + * tokens. + */ +{ + token_t *junk; + + assert(n >= 0); + + while (n > 0) { + assert(tq != nil); + + junk= tq; + tq= tq->next; + deallocate(junk->name); + deallocate(junk); + n--; + } + /* Reset the old reference. */ + old_n= 0; + old_ptq= &tq; +} -- 2.44.0