--- /dev/null
+/* emit_ack.c - emit ACK assembly Author: Kees J. Bot
+ * emit NCC assembly 27 Dec 1993
+ */
+#define nil 0
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include "asmconv.h"
+#include "token.h"
+#include "asm86.h"
+#include "languages.h"
+#include "globals.h"
+
+typedef struct mnemonic { /* ACK as86 mnemonics translation table. */
+ opcode_t opcode;
+ char *name;
+} mnemonic_t;
+
+static mnemonic_t mnemtab[] = {
+ { AAA, "aaa" },
+ { AAD, "aad" },
+ { AAM, "aam" },
+ { AAS, "aas" },
+ { ADC, "adc%" },
+ { ADD, "add%" },
+ { AND, "and%" },
+ { ARPL, "arpl" },
+ { BOUND, "bound" },
+ { BSF, "bsf" },
+ { BSR, "bsr" },
+ { BSWAP, "bswap" },
+ { BT, "bt" },
+ { BTC, "btc" },
+ { BTR, "btr" },
+ { BTS, "bts" },
+ { CALL, "call" },
+ { CALLF, "callf" },
+ { CBW, "cbw" },
+ { CLC, "clc" },
+ { CLD, "cld" },
+ { CLI, "cli" },
+ { CLTS, "clts" },
+ { CMC, "cmc" },
+ { CMP, "cmp%" },
+ { CMPS, "cmps%" },
+ { CMPXCHG, "cmpxchg" },
+ { CWD, "cwd" },
+ { DAA, "daa" },
+ { DAS, "das" },
+ { DEC, "dec%" },
+ { DIV, "div%" },
+ { DOT_ALIGN, ".align" },
+ { DOT_ASCII, ".ascii" },
+ { DOT_ASCIZ, ".asciz" },
+ { DOT_ASSERT, ".assert" },
+ { DOT_BASE, ".base" },
+ { DOT_BSS, ".sect .bss" },
+ { DOT_COMM, ".comm" },
+ { DOT_DATA, ".sect .data" },
+ { DOT_DATA1, ".data1" },
+ { DOT_DATA2, ".data2" },
+ { DOT_DATA4, ".data4" },
+ { DOT_DEFINE, ".define" },
+ { DOT_END, ".sect .end" },
+ { DOT_EXTERN, ".extern" },
+ { DOT_FILE, ".file" },
+ { DOT_LCOMM, ".comm" },
+ { DOT_LINE, ".line" },
+ { DOT_LIST, ".list" },
+ { DOT_NOLIST, ".nolist" },
+ { DOT_ROM, ".sect .rom" },
+ { DOT_SPACE, ".space" },
+ { DOT_SYMB, ".symb" },
+ { DOT_TEXT, ".sect .text" },
+ { DOT_USE16, ".use16" },
+ { DOT_USE32, ".use32" },
+ { ENTER, "enter" },
+ { F2XM1, "f2xm1" },
+ { FABS, "fabs" },
+ { FADD, "fadd" },
+ { FADDD, "faddd" },
+ { FADDP, "faddp" },
+ { FADDS, "fadds" },
+ { FBLD, "fbld" },
+ { FBSTP, "fbstp" },
+ { FCHS, "fchs" },
+ { FCLEX, "fclex" },
+ { FCOMD, "fcomd" },
+ { FCOMPD, "fcompd" },
+ { FCOMPP, "fcompp" },
+ { FCOMPS, "fcomps" },
+ { FCOMS, "fcoms" },
+ { FCOS, "fcos" },
+ { FDECSTP, "fdecstp" },
+ { FDIVD, "fdivd" },
+ { FDIVP, "fdivp" },
+ { FDIVRD, "fdivrd" },
+ { FDIVRP, "fdivrp" },
+ { FDIVRS, "fdivrs" },
+ { FDIVS, "fdivs" },
+ { FFREE, "ffree" },
+ { FIADDL, "fiaddl" },
+ { FIADDS, "fiadds" },
+ { FICOM, "ficom" },
+ { FICOMP, "ficomp" },
+ { FIDIVL, "fidivl" },
+ { FIDIVRL, "fidivrl" },
+ { FIDIVRS, "fidivrs" },
+ { FIDIVS, "fidivs" },
+ { FILDL, "fildl" },
+ { FILDQ, "fildq" },
+ { FILDS, "filds" },
+ { FIMULL, "fimull" },
+ { FIMULS, "fimuls" },
+ { FINCSTP, "fincstp" },
+ { FINIT, "finit" },
+ { FISTL, "fistl" },
+ { FISTP, "fistp" },
+ { FISTS, "fists" },
+ { FISUBL, "fisubl" },
+ { FISUBRL, "fisubrl" },
+ { FISUBRS, "fisubrs" },
+ { FISUBS, "fisubs" },
+ { FLD1, "fld1" },
+ { FLDCW, "fldcw" },
+ { FLDD, "fldd" },
+ { FLDENV, "fldenv" },
+ { FLDL2E, "fldl2e" },
+ { FLDL2T, "fldl2t" },
+ { FLDLG2, "fldlg2" },
+ { FLDLN2, "fldln2" },
+ { FLDPI, "fldpi" },
+ { FLDS, "flds" },
+ { FLDX, "fldx" },
+ { FLDZ, "fldz" },
+ { FMULD, "fmuld" },
+ { FMULP, "fmulp" },
+ { FMULS, "fmuls" },
+ { FNOP, "fnop" },
+ { FPATAN, "fpatan" },
+ { FPREM, "fprem" },
+ { FPREM1, "fprem1" },
+ { FPTAN, "fptan" },
+ { FRNDINT, "frndint" },
+ { FRSTOR, "frstor" },
+ { FSAVE, "fsave" },
+ { FSCALE, "fscale" },
+ { FSIN, "fsin" },
+ { FSINCOS, "fsincos" },
+ { FSQRT, "fsqrt" },
+ { FSTCW, "fstcw" },
+ { FSTD, "fstd" },
+ { FSTENV, "fstenv" },
+ { FSTPD, "fstpd" },
+ { FSTPS, "fstps" },
+ { FSTPX, "fstpx" },
+ { FSTS, "fsts" },
+ { FSTSW, "fstsw" },
+ { FSUBD, "fsubd" },
+ { FSUBP, "fsubp" },
+ { FSUBPR, "fsubpr" },
+ { FSUBRD, "fsubrd" },
+ { FSUBRS, "fsubrs" },
+ { FSUBS, "fsubs" },
+ { FTST, "ftst" },
+ { FUCOM, "fucom" },
+ { FUCOMP, "fucomp" },
+ { FUCOMPP, "fucompp" },
+ { FXAM, "fxam" },
+ { FXCH, "fxch" },
+ { FXTRACT, "fxtract" },
+ { FYL2X, "fyl2x" },
+ { FYL2XP1, "fyl2xp1" },
+ { HLT, "hlt" },
+ { IDIV, "idiv%" },
+ { IMUL, "imul%" },
+ { IN, "in%" },
+ { INC, "inc%" },
+ { INS, "ins%" },
+ { INT, "int" },
+ { INTO, "into" },
+ { INVD, "invd" },
+ { INVLPG, "invlpg" },
+ { IRET, "iret" },
+ { IRETD, "iretd" },
+ { JA, "ja" },
+ { JAE, "jae" },
+ { JB, "jb" },
+ { JBE, "jbe" },
+ { JCXZ, "jcxz" },
+ { JE, "je" },
+ { JG, "jg" },
+ { JGE, "jge" },
+ { JL, "jl" },
+ { JLE, "jle" },
+ { JMP, "jmp" },
+ { JMPF, "jmpf" },
+ { JNE, "jne" },
+ { JNO, "jno" },
+ { JNP, "jnp" },
+ { JNS, "jns" },
+ { JO, "jo" },
+ { JP, "jp" },
+ { JS, "js" },
+ { LAHF, "lahf" },
+ { LAR, "lar" },
+ { LDS, "lds" },
+ { LEA, "lea" },
+ { LEAVE, "leave" },
+ { LES, "les" },
+ { LFS, "lfs" },
+ { LGDT, "lgdt" },
+ { LGS, "lgs" },
+ { LIDT, "lidt" },
+ { LLDT, "lldt" },
+ { LMSW, "lmsw" },
+ { LOCK, "lock" },
+ { LODS, "lods%" },
+ { LOOP, "loop" },
+ { LOOPE, "loope" },
+ { LOOPNE, "loopne" },
+ { LSL, "lsl" },
+ { LSS, "lss" },
+ { LTR, "ltr" },
+ { MOV, "mov%" },
+ { MOVS, "movs%" },
+ { MOVSX, "movsx" },
+ { MOVSXB, "movsxb" },
+ { MOVZX, "movzx" },
+ { MOVZXB, "movzxb" },
+ { MUL, "mul%" },
+ { NEG, "neg%" },
+ { NOP, "nop" },
+ { NOT, "not%" },
+ { OR, "or%" },
+ { OUT, "out%" },
+ { OUTS, "outs%" },
+ { POP, "pop" },
+ { POPA, "popa" },
+ { POPAD, "popad" },
+ { POPF, "popf" },
+ { PUSH, "push" },
+ { PUSHA, "pusha" },
+ { PUSHAD, "pushad" },
+ { PUSHF, "pushf" },
+ { RCL, "rcl%" },
+ { RCR, "rcr%" },
+ { RET, "ret" },
+ { RETF, "retf" },
+ { ROL, "rol%" },
+ { ROR, "ror%" },
+ { SAHF, "sahf" },
+ { SAL, "sal%" },
+ { SAR, "sar%" },
+ { SBB, "sbb%" },
+ { SCAS, "scas%" },
+ { SETA, "seta" },
+ { SETAE, "setae" },
+ { SETB, "setb" },
+ { SETBE, "setbe" },
+ { SETE, "sete" },
+ { SETG, "setg" },
+ { SETGE, "setge" },
+ { SETL, "setl" },
+ { SETLE, "setle" },
+ { SETNE, "setne" },
+ { SETNO, "setno" },
+ { SETNP, "setnp" },
+ { SETNS, "setns" },
+ { SETO, "seto" },
+ { SETP, "setp" },
+ { SETS, "sets" },
+ { SGDT, "sgdt" },
+ { SHL, "shl%" },
+ { SHLD, "shld" },
+ { SHR, "shr%" },
+ { SHRD, "shrd" },
+ { SIDT, "sidt" },
+ { SLDT, "sldt" },
+ { SMSW, "smsw" },
+ { STC, "stc" },
+ { STD, "std" },
+ { STI, "sti" },
+ { STOS, "stos%" },
+ { STR, "str" },
+ { SUB, "sub%" },
+ { TEST, "test%" },
+ { VERR, "verr" },
+ { VERW, "verw" },
+ { WAIT, "wait" },
+ { WBINVD, "wbinvd" },
+ { XADD, "xadd" },
+ { XCHG, "xchg%" },
+ { XLAT, "xlat" },
+ { XOR, "xor%" },
+};
+
+#define farjmp(o) ((o) == JMPF || (o) == CALLF)
+
+static FILE *ef;
+static long eline= 1;
+static char *efile;
+static char *orig_efile;
+static char *opcode2name_tab[N_OPCODES];
+static enum dialect { ACK, NCC } dialect= ACK;
+
+static void ack_putchar(int c)
+/* LOOK, this programmer checks the return code of putc! What an idiot, noone
+ * does that!
+ */
+{
+ if (putc(c, ef) == EOF) fatal(orig_efile);
+}
+
+static void ack_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (vfprintf(ef, fmt, ap) == EOF) fatal(orig_efile);
+ va_end(ap);
+}
+
+void ack_emit_init(char *file, const char *banner)
+/* Prepare producing an ACK assembly file. */
+{
+ mnemonic_t *mp;
+
+ if (file == nil) {
+ file= "stdout";
+ ef= stdout;
+ } else {
+ if ((ef= fopen(file, "w")) == nil) fatal(file);
+ }
+ orig_efile= file;
+ efile= file;
+ ack_printf("! %s", banner);
+ if (dialect == ACK) {
+ /* Declare the four sections used under Minix. */
+ ack_printf(
+ "\n.sect .text; .sect .rom; .sect .data; .sect .bss\n.sect .text");
+ }
+
+ /* Initialize the opcode to mnemonic translation table. */
+ for (mp= mnemtab; mp < arraylimit(mnemtab); mp++) {
+ assert(opcode2name_tab[mp->opcode] == nil);
+ opcode2name_tab[mp->opcode]= mp->name;
+ }
+}
+
+#define opcode2name(op) (opcode2name_tab[op] + 0)
+
+static void ack_put_string(const char *s, size_t n)
+/* Emit a string with weird characters quoted. */
+{
+ while (n > 0) {
+ int c= *s;
+
+ if (c < ' ' || c > 0177) {
+ ack_printf("\\%03o", c & 0xFF);
+ } else
+ if (c == '"' || c == '\\') {
+ ack_printf("\\%c", c);
+ } else {
+ ack_putchar(c);
+ }
+ s++;
+ n--;
+ }
+}
+
+static void ack_put_expression(asm86_t *a, expression_t *e, int deref)
+/* Send an expression, i.e. instruction operands, to the output file. Deref
+ * is true when the rewrite for the ncc dialect may be made.
+ */
+{
+ int isglob;
+
+ assert(e != nil);
+
+ isglob = syms_is_global(e->name);
+
+ switch (e->operator) {
+ case ',':
+ if (dialect == NCC && farjmp(a->opcode)) {
+ /* ACK jmpf seg:off -> NCC jmpf off,seg */
+ ack_put_expression(a, e->right, deref);
+ ack_printf(", ");
+ ack_put_expression(a, e->left, deref);
+ } else {
+ ack_put_expression(a, e->left, deref);
+ ack_printf(farjmp(a->opcode) ? ":" : ", ");
+ ack_put_expression(a, e->right, deref);
+ }
+ break;
+ case 'O':
+ if (deref && a->optype == JUMP) ack_putchar('@');
+ if (e->left != nil) ack_put_expression(a, e->left, 0);
+ if (e->middle != nil) ack_put_expression(a, e->middle, 0);
+ if (e->right != nil) ack_put_expression(a, e->right, 0);
+ break;
+ case '(':
+ if (deref && a->optype == JUMP) ack_putchar('@');
+ if (!deref) ack_putchar('(');
+ ack_put_expression(a, e->middle, 0);
+ if (!deref) ack_putchar(')');
+ break;
+ case 'B':
+ ack_printf("(%s%s)", isglob ? "_" : "", e->name);
+ break;
+ case '1':
+ case '2':
+ case '4':
+ case '8':
+ ack_printf((use16() && e->operator == '1')
+ ? "(%s%s)" : "(%s%s*%c)", isglob ? "_" : "",
+ e->name, e->operator);
+ break;
+ case '+':
+ case '-':
+ case '~':
+ if (e->middle != nil) {
+ if (deref && a->optype != JUMP) ack_putchar('#');
+ ack_putchar(e->operator);
+ ack_put_expression(a, e->middle, 0);
+ break;
+ }
+ /*FALL THROUGH*/
+ case '*':
+ case '/':
+ case '%':
+ case '&':
+ case '|':
+ case '^':
+ case S_LEFTSHIFT:
+ case S_RIGHTSHIFT:
+ if (deref && a->optype != JUMP) ack_putchar('#');
+ ack_put_expression(a, e->left, 0);
+ if (e->operator == S_LEFTSHIFT) {
+ ack_printf("<<");
+ } else
+ if (e->operator == S_RIGHTSHIFT) {
+ ack_printf(">>");
+ } else {
+ ack_putchar(e->operator);
+ }
+ ack_put_expression(a, e->right, 0);
+ break;
+ case '[':
+ if (deref && a->optype != JUMP) ack_putchar('#');
+ ack_putchar('[');
+ ack_put_expression(a, e->middle, 0);
+ ack_putchar(']');
+ break;
+ case 'W':
+ if (deref && a->optype == JUMP && isregister(e->name))
+ {
+ ack_printf("(%s)", e->name);
+ break;
+ }
+ if (deref && a->optype != JUMP && !isregister(e->name)) {
+ ack_putchar('#');
+ }
+ ack_printf("%s%s", isglob ? "_" : "", e->name);
+ break;
+ case 'S':
+ ack_putchar('"');
+ ack_put_string(e->name, e->len);
+ ack_putchar('"');
+ break;
+ default:
+ fprintf(stderr,
+ "asmconv: internal error, unknown expression operator '%d'\n",
+ e->operator);
+ exit(EXIT_FAILURE);
+ }
+}
+
+void ack_emit_instruction(asm86_t *a)
+/* Output one instruction and its operands. */
+{
+ int same= 0;
+ char *p;
+ static int high_seg;
+ int deref;
+ static int prevop;
+
+ if (a == nil) {
+ /* Last call */
+ ack_putchar('\n');
+ return;
+ }
+
+ /* Make sure the line number of the line to be emitted is ok. */
+ if ((a->file != efile && strcmp(a->file, efile) != 0)
+ || a->line < eline || a->line > eline+10) {
+ ack_putchar('\n');
+ ack_printf("! %ld \"%s\"\n", a->line, a->file);
+ efile= a->file;
+ eline= a->line;
+ } else {
+ if (a->line == eline) {
+ if (prevop == DOT_LABEL) {
+ ack_printf("\n");
+ same = 0;
+ }
+ else {
+ ack_printf("; ");
+ same= 1;
+ }
+ }
+ while (eline < a->line) {
+ ack_putchar('\n');
+ eline++;
+ }
+ }
+
+ if (a->opcode == DOT_LABEL)
+ prevop = DOT_LABEL;
+ else
+ prevop = 0;
+
+ if (a->opcode == COMMENT ||
+ a->opcode == C_PREPROCESSOR ||
+ a->opcode == UNKNOWN) {
+ if (a->opcode == COMMENT)
+ if (a->raw_string[0] != '/')
+ a->raw_string[0] = '!';
+ ack_printf("%s", a->raw_string);
+ return;
+ } else
+ if (a->opcode == DOT_LABEL) {
+ assert(a->args->operator == ':');
+ if (syms_is_global(a->args->name))
+ ack_printf("_%s:", a->args->name);
+ else
+ ack_printf("%s:", a->args->name);
+ } else
+ if (a->opcode == DOT_EQU) {
+ assert(a->args->operator == '=');
+ ack_printf("\t%s = ", a->args->name);
+ ack_put_expression(a, a->args->middle, 0);
+ } else
+ if ((p= opcode2name(a->opcode)) != nil) {
+ char *sep= dialect == ACK ? "" : ";";
+
+ if (!is_pseudo(a->opcode) && !same) ack_putchar('\t');
+
+ switch (a->rep) {
+ case ONCE: break;
+ case REP: ack_printf("rep"); break;
+ case REPE: ack_printf("repe"); break;
+ case REPNE: ack_printf("repne"); break;
+ default: assert(0);
+ }
+ if (a->rep != ONCE) {
+ ack_printf(dialect == ACK ? " " : "; ");
+ }
+ switch (a->seg) {
+ case DEFSEG: break;
+ case CSEG: ack_printf("cseg"); break;
+ case DSEG: ack_printf("dseg"); break;
+ case ESEG: ack_printf("eseg"); break;
+ case FSEG: ack_printf("fseg"); break;
+ case GSEG: ack_printf("gseg"); break;
+ case SSEG: ack_printf("sseg"); break;
+ default: assert(0);
+ }
+ if (a->seg != DEFSEG) {
+ ack_printf(dialect == ACK ? " " : "; ");
+ }
+ if (a->oaz & OPZ) ack_printf(use16() ? "o32 " : "o16 ");
+ if (a->oaz & ADZ) ack_printf(use16() ? "a32 " : "a16 ");
+
+ if (a->opcode == CBW) {
+ p= !(a->oaz & OPZ) == use16() ? "cbw" : "cwde";
+ }
+
+ if (a->opcode == CWD) {
+ p= !(a->oaz & OPZ) == use16() ? "cwd" : "cdq";
+ }
+
+ if (a->opcode == DOT_COMM && a->args != nil
+ && a->args->operator == ','
+ && a->args->left->operator == 'W'
+ ) {
+ if (syms_is_global(a->args->left->name))
+ ack_printf(".define\t_%s; ", a->args->left->name);
+ else
+ ack_printf(".define\t%s; ", a->args->left->name);
+ }
+#define IS_OP_CR4(r) ((r) && (r)->name && strcmp((r)->name, "cr4") == 0)
+ /* unsupported mov to/from cr4 */
+ if (a->opcode == MOV && a->args->operator == ',') {
+ if (IS_OP_CR4(a->args->left)) {
+ /* read cr4 */
+ ack_printf(".data1 0x0f, 0x20, 0xe0\n");
+ return;
+ }
+ if (IS_OP_CR4(a->args->right)) {
+ /* write cr4 */
+ ack_printf(".data1 0x0f, 0x22, 0xe0\n");
+ return;
+ }
+ }
+ /* we are translating from GNU */
+ if (a->args && a->args->operator == ','
+ /* don't swap ljmp prefixed with segment */
+ && a->opcode != JMPF) {
+ expression_t * tmp;
+
+ tmp = a->args->right;
+ a->args->right = a->args->left;
+ a->args->left = tmp;
+ }
+ while (*p != 0) {
+ if (*p == '%') {
+ if (a->optype == BYTE) ack_putchar('b');
+ } else {
+ ack_putchar(*p);
+ }
+ p++;
+ }
+ /*
+ * gnu assembly expresses the ES segment in the long instruction
+ * format. Not neccessary in ack
+ */
+ if (a->opcode == MOVS)
+ return;
+ if (a->args != nil) {
+ ack_putchar('\t');
+ switch (a->opcode) {
+ case IN:
+ case OUT:
+ case INT:
+ deref= 0;
+ break;
+ default:
+ deref= (dialect == NCC && a->optype != PSEUDO);
+ }
+ ack_put_expression(a, a->args, deref);
+ }
+ if (a->opcode == DOT_USE16) set_use16();
+ if (a->opcode == DOT_USE32) set_use32();
+ } else {
+ fprintf(stderr,
+ "asmconv: internal error, unknown opcode '%d'\n",
+ a->opcode);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * comment on the same line as an instruction. Cannot be definition of a
+ * macro
+ * */
+ if (a->raw_string) {
+ if (a->raw_string[0] != '/')
+ a->raw_string[0] = '!';
+ ack_printf("\t%s", a->raw_string);
+ }
+}
+
+/* A few ncc mnemonics are different. */
+static mnemonic_t ncc_mnemtab[] = {
+ { DOT_BSS, ".bss" },
+ { DOT_DATA, ".data" },
+ { DOT_END, ".end" },
+ { DOT_ROM, ".rom" },
+ { DOT_TEXT, ".text" },
+};
+
+void ncc_emit_init(char *file, const char *banner)
+/* The assembly produced by the Minix ACK ANSI C compiler for the 8086 is
+ * different from the normal ACK assembly, and different from the old K&R
+ * assembler. This brings us endless joy. (It was supposed to make
+ * translation of the assembly used by the old K&R assembler easier by
+ * not deviating too much from that dialect.)
+ */
+{
+ mnemonic_t *mp;
+
+ dialect= NCC;
+ ack_emit_init(file, banner);
+
+ /* Replace a few mnemonics. */
+ for (mp= ncc_mnemtab; mp < arraylimit(ncc_mnemtab); mp++) {
+ opcode2name_tab[mp->opcode]= mp->name;
+ }
+}
+
+void ncc_emit_instruction(asm86_t *a)
+{
+ ack_emit_instruction(a);
+}
--- /dev/null
+/* parse_ack.c - parse GNU assembly Author: R.S. Veldema
+ * <rveldema@cs.vu.nl>
+ * 26 Aug 1996
+ */
+#define nil 0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include "asmconv.h"
+#include "token.h"
+#include "asm86.h"
+#include "languages.h"
+#include "globals.h"
+
+typedef struct mnemonic { /* GNU as86 mnemonics translation table. */
+ char *name;
+ opcode_t opcode;
+ optype_t optype;
+} mnemonic_t;
+
+static mnemonic_t mnemtab[] = { /* This array is sorted. */
+ { ".align", DOT_ALIGN, PSEUDO },
+ { ".ascii", DOT_ASCII, PSEUDO },
+ { ".asciz", DOT_ASCIZ, PSEUDO },
+ { ".assert", DOT_ASSERT, PSEUDO },
+ { ".balign", DOT_ALIGN, PSEUDO },
+ { ".base", DOT_BASE, PSEUDO },
+ { ".bss", DOT_BSS, PSEUDO },
+ { ".byte", DOT_DATA1, PSEUDO },
+ { ".comm", DOT_COMM, PSEUDO },
+ { ".data", DOT_DATA, PSEUDO },
+ { ".data1", DOT_DATA1, PSEUDO },
+ { ".data2", DOT_DATA2, PSEUDO },
+ { ".data4", DOT_DATA4, PSEUDO },
+ { ".end", DOT_END, PSEUDO },
+ { ".extern", DOT_EXTERN, PSEUDO },
+ { ".file", DOT_FILE, PSEUDO },
+ { ".global", DOT_DEFINE, PSEUDO },
+ { ".globl", DOT_DEFINE, PSEUDO },
+ { ".lcomm", DOT_LCOMM, PSEUDO },
+ { ".line", DOT_LINE, PSEUDO },
+ { ".list", DOT_LIST, PSEUDO },
+ { ".long", DOT_DATA4, PSEUDO },
+ { ".nolist", DOT_NOLIST, PSEUDO },
+ { ".rom", DOT_ROM, PSEUDO },
+ { ".short", DOT_DATA2, PSEUDO },
+ { ".space", DOT_SPACE, PSEUDO },
+ { ".symb", DOT_SYMB, PSEUDO },
+ { ".text", DOT_TEXT, PSEUDO },
+ { ".word", DOT_DATA2, PSEUDO },
+ { "aaa", AAA, WORD },
+ { "aad", AAD, WORD },
+ { "aam", AAM, WORD },
+ { "aas", AAS, WORD },
+ { "adcb", ADC, BYTE },
+ { "adcl", ADC, WORD },
+ { "adcw", ADC, OWORD },
+ { "addb", ADD, BYTE },
+ { "addl", ADD, WORD },
+ { "addw", ADD, OWORD },
+ { "andb", AND, BYTE },
+ { "andl", AND, WORD },
+ { "andw", AND, OWORD },
+ { "arpl", ARPL, WORD },
+ { "bound", BOUND, WORD },
+ { "bsf", BSF, WORD },
+ { "bsr", BSR, WORD },
+ { "bswap", BSWAP, WORD },
+ { "btc", BTC, WORD },
+ { "btl", BT, WORD },
+ { "btr", BTR, WORD },
+ { "bts", BTS, WORD },
+ { "btw", BT, OWORD },
+ { "call", CALL, JUMP },
+ { "callf", CALLF, JUMP },
+ { "cbtw", CBW, OWORD },
+ { "cbw", CBW, WORD },
+ { "cdq", CWD, WORD },
+ { "clc", CLC, WORD },
+ { "cld", CLD, WORD },
+ { "cli", CLI, WORD },
+ { "cltd", CWD, WORD },
+ { "clts", CLTS, WORD },
+ { "cmc", CMC, WORD },
+ { "cmpb", CMP, BYTE },
+ { "cmpl", CMP, WORD },
+ { "cmps", CMPS, WORD },
+ { "cmpsb", CMPS, BYTE },
+ { "cmpsl", CMPS, OWORD },
+ { "cmpsw", CMPS, WORD },
+ { "cmpw", CMP, OWORD },
+ { "cmpxchg", CMPXCHG, WORD },
+ { "cpuid", CPUID, WORD },
+ { "cwd", CWD, WORD },
+ { "cwde", CBW, WORD },
+ { "cwtd", CWD, OWORD },
+ { "cwtl", CBW, WORD },
+ { "daa", DAA, WORD },
+ { "das", DAS, WORD },
+ { "decb", DEC, BYTE },
+ { "decl", DEC, WORD },
+ { "decw", DEC, OWORD },
+ { "divb", DIV, BYTE },
+ { "divl", DIV, WORD },
+ { "divw", DIV, OWORD },
+ { "enter", ENTER, WORD },
+ { "f2xm1", F2XM1, WORD },
+ { "fabs", FABS, WORD },
+ { "fadd", FADD, WORD },
+ { "faddd", FADDD, WORD },
+ { "faddp", FADDP, WORD },
+ { "fadds", FADDS, WORD },
+ { "fbld", FBLD, WORD },
+ { "fbstp", FBSTP, WORD },
+ { "fchs", FCHS, WORD },
+ { "fcomd", FCOMD, WORD },
+ { "fcompd", FCOMPD, WORD },
+ { "fcompp", FCOMPP, WORD },
+ { "fcomps", FCOMPS, WORD },
+ { "fcoms", FCOMS, WORD },
+ { "fcos", FCOS, WORD },
+ { "fdecstp", FDECSTP, WORD },
+ { "fdivd", FDIVD, WORD },
+ { "fdivp", FDIVP, WORD },
+ { "fdivrd", FDIVRD, WORD },
+ { "fdivrp", FDIVRP, WORD },
+ { "fdivrs", FDIVRS, WORD },
+ { "fdivs", FDIVS, WORD },
+ { "ffree", FFREE, WORD },
+ { "fiaddl", FIADDL, WORD },
+ { "fiadds", FIADDS, WORD },
+ { "ficom", FICOM, WORD },
+ { "ficomp", FICOMP, WORD },
+ { "fidivl", FIDIVL, WORD },
+ { "fidivrl", FIDIVRL, WORD },
+ { "fidivrs", FIDIVRS, WORD },
+ { "fidivs", FIDIVS, WORD },
+ { "fildl", FILDL, WORD },
+ { "fildq", FILDQ, WORD },
+ { "filds", FILDS, WORD },
+ { "fimull", FIMULL, WORD },
+ { "fimuls", FIMULS, WORD },
+ { "fincstp", FINCSTP, WORD },
+ { "fistl", FISTL, WORD },
+ { "fistp", FISTP, WORD },
+ { "fists", FISTS, WORD },
+ { "fisubl", FISUBL, WORD },
+ { "fisubrl", FISUBRL, WORD },
+ { "fisubrs", FISUBRS, WORD },
+ { "fisubs", FISUBS, WORD },
+ { "fld1", FLD1, WORD },
+ { "fldcw", FLDCW, WORD },
+ { "fldd", FLDD, WORD },
+ { "fldenv", FLDENV, WORD },
+ { "fldl2e", FLDL2E, WORD },
+ { "fldl2t", FLDL2T, WORD },
+ { "fldlg2", FLDLG2, WORD },
+ { "fldln2", FLDLN2, WORD },
+ { "fldpi", FLDPI, WORD },
+ { "flds", FLDS, WORD },
+ { "fldx", FLDX, WORD },
+ { "fldz", FLDZ, WORD },
+ { "fmuld", FMULD, WORD },
+ { "fmulp", FMULP, WORD },
+ { "fmuls", FMULS, WORD },
+ { "fnclex", FCLEX, WORD },
+ { "fninit", FINIT, WORD },
+ { "fnop", FNOP, WORD },
+ { "fnsave", FSAVE, WORD },
+ { "fnstcw", FSTCW, WORD },
+ { "fnstenv", FSTENV, WORD },
+ { "fpatan", FPATAN, WORD },
+ { "fprem", FPREM, WORD },
+ { "fprem1", FPREM1, WORD },
+ { "fptan", FPTAN, WORD },
+ { "frndint", FRNDINT, WORD },
+ { "frstor", FRSTOR, WORD },
+ { "fscale", FSCALE, WORD },
+ { "fsin", FSIN, WORD },
+ { "fsincos", FSINCOS, WORD },
+ { "fsqrt", FSQRT, WORD },
+ { "fstd", FSTD, WORD },
+ { "fstpd", FSTPD, WORD },
+ { "fstps", FSTPS, WORD },
+ { "fstpx", FSTPX, WORD },
+ { "fsts", FSTS, WORD },
+ { "fstsw", FSTSW, WORD },
+ { "fsubd", FSUBD, WORD },
+ { "fsubp", FSUBP, WORD },
+ { "fsubpr", FSUBPR, WORD },
+ { "fsubrd", FSUBRD, WORD },
+ { "fsubrs", FSUBRS, WORD },
+ { "fsubs", FSUBS, WORD },
+ { "ftst", FTST, WORD },
+ { "fucom", FUCOM, WORD },
+ { "fucomp", FUCOMP, WORD },
+ { "fucompp", FUCOMPP, WORD },
+ { "fxam", FXAM, WORD },
+ { "fxch", FXCH, WORD },
+ { "fxtract", FXTRACT, WORD },
+ { "fyl2x", FYL2X, WORD },
+ { "fyl2xp1", FYL2XP1, WORD },
+ { "hlt", HLT, WORD },
+ { "idivb", IDIV, BYTE },
+ { "idivl", IDIV, WORD },
+ { "idivw", IDIV, OWORD },
+ { "imulb", IMUL, BYTE },
+ { "imull", IMUL, WORD },
+ { "imulw", IMUL, OWORD },
+ { "inb", IN, BYTE },
+ { "incb", INC, BYTE },
+ { "incl", INC, WORD },
+ { "incw", INC, OWORD },
+ { "inl", IN, WORD },
+ { "insb", INS, BYTE },
+ { "insl", INS, WORD },
+ { "insw", INS, OWORD },
+ { "int", INT, WORD },
+ { "into", INTO, JUMP },
+ { "invd", INVD, WORD },
+ { "invlpg", INVLPG, WORD },
+ { "inw", IN, OWORD },
+ { "iret", IRET, JUMP },
+ { "iretd", IRETD, JUMP },
+ { "ja", JA, JUMP },
+ { "jae", JAE, JUMP },
+ { "jb", JB, JUMP },
+ { "jbe", JBE, JUMP },
+ { "jc", JB, JUMP },
+ { "jcxz", JCXZ, JUMP },
+ { "je", JE, JUMP },
+ { "jecxz", JCXZ, JUMP },
+ { "jg", JG, JUMP },
+ { "jge", JGE, JUMP },
+ { "jl", JL, JUMP },
+ { "jle", JLE, JUMP },
+ { "jmp", JMP, JUMP },
+ { "jmpf", JMPF, JUMP },
+ { "jna", JBE, JUMP },
+ { "jnae", JB, JUMP },
+ { "jnb", JAE, JUMP },
+ { "jnbe", JA, JUMP },
+ { "jnc", JAE, JUMP },
+ { "jne", JNE, JUMP },
+ { "jng", JLE, JUMP },
+ { "jnge", JL, JUMP },
+ { "jnl", JGE, JUMP },
+ { "jnle", JG, JUMP },
+ { "jno", JNO, JUMP },
+ { "jnp", JNP, JUMP },
+ { "jns", JNS, JUMP },
+ { "jnz", JNE, JUMP },
+ { "jo", JO, JUMP },
+ { "jp", JP, JUMP },
+ { "js", JS, JUMP },
+ { "jz", JE, JUMP },
+ { "lahf", LAHF, WORD },
+ { "lar", LAR, WORD },
+ { "lds", LDS, WORD },
+ { "leal", LEA, WORD },
+ { "leave", LEAVE, WORD },
+ { "leaw", LEA, OWORD },
+ { "les", LES, WORD },
+ { "lfs", LFS, WORD },
+ { "lgdt", LGDT, WORD },
+ { "lgs", LGS, WORD },
+ { "lidt", LIDT, WORD },
+ { "ljmp", JMPF, JUMP },
+ { "ljmpw", JMPF, JUMP16 },
+ { "lldt", LLDT, WORD },
+ { "lmsw", LMSW, WORD },
+ { "lock", LOCK, WORD },
+ { "lods", LODS, WORD },
+ { "lodsb", LODS, BYTE },
+ { "loop", LOOP, JUMP },
+ { "loope", LOOPE, JUMP },
+ { "loopne", LOOPNE, JUMP },
+ { "loopnz", LOOPNE, JUMP },
+ { "loopz", LOOPE, JUMP },
+ { "lretw", RETF, JUMP16 },
+ { "lsl", LSL, WORD },
+ { "lss", LSS, WORD },
+ { "ltr", LTR, WORD },
+ { "movb", MOV, BYTE },
+ { "movl", MOV, WORD },
+ { "movsb", MOVS, BYTE },
+ { "movsbl", MOVSXB, WORD },
+ { "movsbw", MOVSXB, OWORD },
+ { "movsl", MOVS, WORD },
+ { "movsw", MOVS, OWORD },
+ { "movswl", MOVSX, WORD },
+ { "movw", MOV, OWORD },
+ { "movzbl", MOVZXB, WORD },
+ { "movzbw", MOVZXB, OWORD },
+ { "movzwl", MOVZX, WORD },
+ { "mulb", MUL, BYTE },
+ { "mull", MUL, WORD },
+ { "mulw", MUL, OWORD },
+ { "negb", NEG, BYTE },
+ { "negl", NEG, WORD },
+ { "negw", NEG, OWORD },
+ { "nop", NOP, WORD },
+ { "notb", NOT, BYTE },
+ { "notl", NOT, WORD },
+ { "notw", NOT, OWORD },
+ { "orb", OR, BYTE },
+ { "orl", OR, WORD },
+ { "orw", OR, OWORD },
+ { "outb", OUT, BYTE },
+ { "outl", OUT, WORD },
+ { "outsb", OUTS, BYTE },
+ { "outsl", OUTS, WORD },
+ { "outsw", OUTS, OWORD },
+ { "outw", OUT, OWORD },
+ { "pop", POP, WORD },
+ { "popa", POPA, WORD },
+ { "popal", POPAD, WORD },
+ { "popf", POPF, WORD },
+ { "popfl", POPF, WORD },
+ { "popl", POP, WORD },
+ { "popw", POP, OWORD },
+ { "push", PUSH, WORD },
+ { "pusha", PUSHA, WORD },
+ { "pushal", PUSHAD, WORD },
+ { "pushf", PUSHF, WORD },
+ { "pushl", PUSH, WORD },
+ { "pushw", PUSH, OWORD },
+ { "rclb", RCL, BYTE },
+ { "rcll", RCL, WORD },
+ { "rclw", RCL, OWORD },
+ { "rcrb", RCR, BYTE },
+ { "rcrl", RCR, WORD },
+ { "rcrw", RCR, OWORD },
+ { "ret", RET, JUMP },
+ { "retf", RETF, JUMP },
+ { "rolb", ROL, BYTE },
+ { "roll", ROL, WORD },
+ { "rolw", ROL, OWORD },
+ { "rorb", ROR, BYTE },
+ { "rorl", ROR, WORD },
+ { "rorw", ROR, OWORD },
+ { "sahf", SAHF, WORD },
+ { "salb", SAL, BYTE },
+ { "sall", SAL, WORD },
+ { "salw", SAL, OWORD },
+ { "sarb", SAR, BYTE },
+ { "sarl", SAR, WORD },
+ { "sarw", SAR, OWORD },
+ { "sbbb", SBB, BYTE },
+ { "sbbl", SBB, WORD },
+ { "sbbw", SBB, OWORD },
+ { "scasb", SCAS, BYTE },
+ { "scasl", SCAS, WORD },
+ { "scasw", SCAS, OWORD },
+ { "seta", SETA, BYTE },
+ { "setae", SETAE, BYTE },
+ { "setb", SETB, BYTE },
+ { "setbe", SETBE, BYTE },
+ { "sete", SETE, BYTE },
+ { "setg", SETG, BYTE },
+ { "setge", SETGE, BYTE },
+ { "setl", SETL, BYTE },
+ { "setna", SETBE, BYTE },
+ { "setnae", SETB, BYTE },
+ { "setnb", SETAE, BYTE },
+ { "setnbe", SETA, BYTE },
+ { "setne", SETNE, BYTE },
+ { "setng", SETLE, BYTE },
+ { "setnge", SETL, BYTE },
+ { "setnl", SETGE, BYTE },
+ { "setnle", SETG, BYTE },
+ { "setno", SETNO, BYTE },
+ { "setnp", SETNP, BYTE },
+ { "setns", SETNS, BYTE },
+ { "seto", SETO, BYTE },
+ { "setp", SETP, BYTE },
+ { "sets", SETS, BYTE },
+ { "setz", SETE, BYTE },
+ { "sgdt", SGDT, WORD },
+ { "shlb", SHL, BYTE },
+ { "shldl", SHLD, WORD },
+ { "shll", SHL, WORD },
+ { "shlw", SHL, OWORD },
+ { "shrb", SHR, BYTE },
+ { "shrdl", SHRD, WORD },
+ { "shrl", SHR, WORD },
+ { "shrw", SHR, OWORD },
+ { "sidt", SIDT, WORD },
+ { "sldt", SLDT, WORD },
+ { "smsw", SMSW, WORD },
+ { "stc", STC, WORD },
+ { "std", STD, WORD },
+ { "sti", STI, WORD },
+ { "stosb", STOS, BYTE },
+ { "stosl", STOS, WORD },
+ { "stosw", STOS, OWORD },
+ { "str", STR, WORD },
+ { "subb", SUB, BYTE },
+ { "subl", SUB, WORD },
+ { "subw", SUB, OWORD },
+ { "testb", TEST, BYTE },
+ { "testl", TEST, WORD },
+ { "testw", TEST, OWORD },
+ { "verr", VERR, WORD },
+ { "verw", VERW, WORD },
+ { "wait", WAIT, WORD },
+ { "wbinvd", WBINVD, WORD },
+ { "xadd", XADD, WORD },
+ { "xchgb", XCHG, BYTE },
+ { "xchgl", XCHG, WORD },
+ { "xchgw", XCHG, OWORD },
+ { "xlat", XLAT, WORD },
+ { "xorb", XOR, BYTE },
+ { "xorl", XOR, WORD },
+ { "xorw", XOR, OWORD },
+};
+
+void gnu_parse_init(char *file)
+/* Prepare parsing of an GNU assembly file. */
+{
+ tok_init(file, '#');
+}
+
+static void zap(void)
+/* An error, zap the rest of the line. */
+{
+ token_t *t;
+
+ while ((t= get_token(0))->type != T_EOF && t->symbol != ';'
+ && t->type != T_COMMENT)
+ skip_token(1);
+}
+
+/* same as in ACK */
+static int zap_unknown(asm86_t *a)
+/* An error, zap the rest of the line. */
+{
+ token_t *t;
+#define MAX_ASTR 4096
+ char astr[MAX_ASTR];
+ unsigned astr_len = 0;
+
+ astr[astr_len++] = '\t';
+ while ((t= get_token(0))->type != T_EOF && t->symbol != ';'
+ && t->type != T_COMMENT) {
+ switch(t->type) {
+ case T_CHAR:
+ astr[astr_len++] = t->symbol;
+ break;
+ case T_WORD:
+ case T_STRING:
+ strncpy(astr + astr_len, t->name, t->len);
+ astr_len += t->len;
+ break;
+
+ }
+ skip_token(1);
+ }
+ astr[astr_len++] = '\0';
+
+ a->raw_string = malloc(astr_len);
+ if (!a->raw_string)
+ return -1;
+
+ strcpy(a->raw_string, astr);
+
+ return 0;
+}
+
+static mnemonic_t *search_mnem(char *name)
+/* Binary search for a mnemonic. (That's why the table is sorted.) */
+{
+ int low, mid, high;
+ int cmp;
+ mnemonic_t *m;
+ char name_buf[64];
+ int brk = 0;
+
+try_long:
+ low= 0;
+ high= arraysize(mnemtab)-1;
+ while (low <= high) {
+ mid= (low + high) / 2;
+ m= &mnemtab[mid];
+
+ if ((cmp= strcmp(name, m->name)) == 0) return m;
+
+ if (cmp < 0) high= mid-1; else low= mid+1;
+ }
+
+ /*
+ * in gnu the modifier 'l' is usually omitted, however we need the
+ * information about the arguments length. Therefore we try if we know
+ * such instruction. It covers most of the cases of unknown instructions
+ */
+ if (!brk) {
+ int len = strlen(name);
+ strcpy(name_buf, name);
+ name_buf[len] = 'l';
+ name_buf[len + 1] = '\0';
+ name = name_buf;
+ brk = 1;
+ goto try_long;
+ }
+
+ return nil;
+}
+
+static expression_t *gnu_get_C_expression(int *pn)
+/* Read a "C-like" expression. Note that we don't worry about precedence,
+ * the expression is printed later like it is read. If the target language
+ * does not have all the operators (like ~) then this has to be repaired by
+ * changing the source file. (No problem, you still have one source file
+ * to maintain, not two.)
+ */
+{
+ expression_t *e, *a1, *a2;
+ token_t *t;
+
+ if ((t= get_token(*pn))->symbol == '(') {
+ /* ( expr ): grouping. */
+ (*pn)++;
+ if ((a1= gnu_get_C_expression(pn)) == nil) return nil;
+ if (get_token(*pn)->symbol != ')') {
+ parse_err(1, t, "missing )\n");
+ del_expr(a1);
+ return nil;
+ }
+ (*pn)++;
+ e= new_expr();
+ e->operator= '[';
+ e->middle= a1;
+ } else
+ if (t->type == T_WORD || t->type == T_STRING) {
+ /* Label, number, or string. */
+ e= new_expr();
+ e->operator= t->type == T_WORD ? 'W' : 'S';
+ e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
+ memcpy(e->name, t->name , t->len+1);
+ e->len= t->len;
+ (*pn)++;
+ } else
+ if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
+ /* Unary operator. */
+ (*pn)++;
+ if ((a1= gnu_get_C_expression(pn)) == nil) return nil;
+ e= new_expr();
+ e->operator= t->symbol;
+ e->middle= a1;
+ } else {
+ parse_err(1, t, "expression syntax error\n");
+ return nil;
+ }
+
+ switch ((t= get_token(*pn))->symbol) {
+ case '%':
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '&':
+ case '|':
+ case '^':
+ case S_LEFTSHIFT:
+ case S_RIGHTSHIFT:
+ (*pn)++;
+ a1= e;
+ if ((a2= gnu_get_C_expression(pn)) == nil) {
+ del_expr(a1);
+ return nil;
+ }
+ e= new_expr();
+ e->operator= t->symbol;
+ e->left= a1;
+ e->right= a2;
+ }
+ return e;
+}
+
+static expression_t *gnu_get_operand(asm86_t * a, int *pn, int deref)
+/* Get something like: $immed, memory, offset(%base,%index,scale), or simpler. */
+{
+ expression_t *e, *offset, *base, *index;
+ token_t *t;
+ int c;
+
+ if (get_token(*pn)->symbol == '$') {
+ /* An immediate value. */
+ (*pn)++;
+ return gnu_get_C_expression(pn);
+ }
+
+ if (get_token(*pn)->symbol == '*') {
+ (*pn)++;
+ deref = 1;
+#if 0
+ if ((offset= gnu_get_operand(a, pn, deref)) == nil) return nil;
+#if 0
+ e= new_expr();
+ e->operator= '(';
+ e->middle= offset;
+ return e;
+#endif
+ return offset;
+#endif
+ }
+
+ if ((get_token(*pn)->symbol == '%')
+ && (t= get_token(*pn + 1))->type == T_WORD
+ && isregister(t->name)
+ ) {
+ /* A register operand. */
+ (*pn)+= 2;
+ e= new_expr();
+ e->operator= 'W';
+ e->name= copystr(t->name);
+ return e;
+ }
+
+ /* Offset? */
+ if (get_token(*pn)->symbol != '('
+ || get_token(*pn + 1)->symbol != '%') {
+ /* There is an offset. */
+ if ((offset= gnu_get_C_expression(pn)) == nil) return nil;
+ } else {
+ /* No offset. */
+ offset= nil;
+ }
+
+ /* (%base,%index,scale) ? */
+ base= index= nil;
+ if (get_token(*pn)->symbol == '(') {
+ (*pn)++;
+
+ /* %base ? */
+ if (get_token(*pn)->symbol == '%'
+ && (t= get_token(*pn + 1))->type == T_WORD
+ && isregister(t->name)
+ ) {
+ /* A base register expression. */
+ base= new_expr();
+ base->operator= 'B';
+ base->name= copystr(t->name);
+ (*pn)+= 2;
+ }
+
+ if (get_token(*pn)->symbol == ',') (*pn)++;
+
+ /* %index ? */
+ if (get_token(*pn)->symbol == '%'
+ && (t= get_token(*pn + 1))->type == T_WORD
+ && isregister(t->name)
+ ) {
+ /* A index register expression. */
+ index= new_expr();
+ index->operator= '1'; /* for now */
+ index->name= copystr(t->name);
+ (*pn)+= 2;
+ }
+
+ if (get_token(*pn)->symbol == ',') (*pn)++;
+
+ /* scale ? */
+ if ((base != nil || index != nil)
+ && (t= get_token(*pn))->type == T_WORD
+ && strchr("1248", t->name[0]) != nil
+ && t->name[1] == 0
+ ) {
+ if (index == nil) {
+ /* Base is really an index register. */
+ index= base;
+ base= nil;
+ }
+ index->operator= t->name[0];
+ (*pn)++;
+ }
+
+ if (get_token(*pn)->symbol == ')') {
+ /* Ending paren. */
+ (*pn)++;
+ } else {
+ /* Alas. */
+ parse_err(1, t, "operand syntax error\n");
+ del_expr(offset);
+ del_expr(base);
+ del_expr(index);
+ return nil;
+ }
+ }
+
+ if (base == nil && index == nil) {
+ if (deref) {
+ /* Return a lone offset as (offset). */
+ e= new_expr();
+ e->operator= '(';
+ e->middle= offset;
+ } else {
+ /* Return a lone offset as is. */
+ e= offset;
+ }
+ } else {
+ e= new_expr();
+ e->operator= 'O';
+ e->left= offset;
+
+ e->middle= base;
+ e->right= index;
+ }
+ return e;
+}
+
+static expression_t *gnu_get_oplist(asm86_t * a, int *pn, int deref)
+/* Get a comma (or colon for jmpf and callf) separated list of instruction
+ * operands.
+ */
+{
+ expression_t *e, *o1, *o2;
+ token_t *t;
+ int sreg;
+
+ if ((e= gnu_get_operand(a, pn, deref)) == nil) return nil;
+
+ t = get_token(*pn);
+
+ if (t->symbol == ':' && IS_REGSEG(sreg = isregister(e->name))) {
+ a->seg = segreg2seg(sreg);
+ del_expr(e);
+ (*pn)++;
+ e = gnu_get_oplist(a, pn, deref);
+ }
+ else if (t->symbol == ',' || t->symbol == ':') {
+ o1= e;
+ (*pn)++;
+ if ((o2= gnu_get_oplist(a, pn, deref)) == nil) {
+ del_expr(o1);
+ return nil;
+ }
+ e= new_expr();
+ e->operator= ',';
+ e->left= o2;
+ e->right= o1;
+ }
+ return e;
+}
+
+
+static asm86_t *gnu_get_statement(void)
+/* Get a pseudo op or machine instruction with arguments. */
+{
+ token_t *t= get_token(0);
+ token_t *tn;
+ asm86_t *a;
+ mnemonic_t *m;
+ int n;
+ int prefix_seen;
+ int deref;
+
+ assert(t->type == T_WORD);
+
+ a= new_asm86();
+
+ /* Process instruction prefixes. */
+ for (prefix_seen= 0;; prefix_seen= 1) {
+ if (strcmp(t->name, "rep") == 0
+ || strcmp(t->name, "repe") == 0
+ || strcmp(t->name, "repne") == 0
+ || strcmp(t->name, "repz") == 0
+ || strcmp(t->name, "repnz") == 0
+ ) {
+ if (a->rep != ONCE) {
+ parse_err(1, t,
+ "can't have more than one rep\n");
+ }
+ switch (t->name[3]) {
+ case 0: a->rep= REP; break;
+ case 'e':
+ case 'z': a->rep= REPE; break;
+ case 'n': a->rep= REPNE; break;
+ }
+ } else
+ if (!prefix_seen) {
+ /* No prefix here, get out! */
+ break;
+ } else {
+ /* No more prefixes, next must be an instruction. */
+ if (t->type != T_WORD
+ || (m= search_mnem(t->name)) == nil
+ || m->optype == PSEUDO
+ ) {
+ parse_err(1, t,
+ "machine instruction expected after instruction prefix\n");
+ del_asm86(a);
+ return nil;
+ }
+ break;
+ }
+
+ /* Skip the prefix and extra newlines. */
+ do {
+ skip_token(1);
+ } while ((t= get_token(0))->symbol == ';');
+ }
+
+ /* All the readahead being done upsets the line counter. */
+ a->line= t->line;
+
+ /* Read a machine instruction or pseudo op. */
+ if ((m= search_mnem(t->name)) == nil) {
+ /* we assume that unknown stuff is part of unresolved macro */
+ a->opcode = UNKNOWN;
+ if (zap_unknown(a)) {
+ parse_err(1, t, "unknown instruction '%s'\n", t->name);
+ del_asm86(a);
+ return nil;
+ }
+ return a;
+ }
+ a->opcode= m->opcode;
+ a->optype= m->optype;
+ a->oaz= 0;
+ if (a->optype == OWORD) {
+ a->oaz|= OPZ;
+ a->optype= WORD;
+ }
+ else if (a->optype == JUMP16) {
+ a->oaz|= OPZ;
+ a->optype= JUMP;
+ }
+
+ switch (a->opcode) {
+ case IN:
+ case OUT:
+ case INT:
+ deref= 0;
+ break;
+ default:
+ deref= (a->optype >= BYTE);
+ }
+ n= 1;
+ if (get_token(1)->type != T_COMMENT && get_token(1)->symbol != ';'
+ && (a->args= gnu_get_oplist(a, &n, deref)) == nil) {
+ del_asm86(a);
+ return nil;
+ }
+ tn = get_token(n);
+ if (tn->type == T_COMMENT) {
+ a->raw_string = malloc(tn->len + 1);
+ if (!a->raw_string)
+ return NULL;
+
+ strcpy(a->raw_string, tn->name);
+ } else
+ if (get_token(n)->symbol != ';') {
+ parse_err(1, t, "garbage at end of instruction\n");
+ del_asm86(a);
+ return nil;
+ }
+ if (!is_pseudo(a->opcode)) {
+ /* GNU operand order is the other way around. */
+ expression_t *e, *t;
+
+ e= a->args;
+ while (e != nil && e->operator == ',') {
+ t= e->right; e->right= e->left; e->left= t;
+ e= e->left;
+ }
+ }
+ switch (a->opcode) {
+ case DOT_ALIGN:
+ /* Delete two argument .align, because ACK can't do it.
+ */
+ if (a->args == nil || a->args->operator != 'W') {
+ del_asm86(a);
+ return nil;
+ }
+ if (a->args != nil && a->args->operator == 'W'
+ && isanumber(a->args->name)
+ ) {
+ unsigned n;
+ char num[sizeof(int) * CHAR_BIT / 3 + 1];
+
+ n= strtoul(a->args->name, nil, 0);
+ sprintf(num, "%u", n);
+ deallocate(a->args->name);
+ a->args->name= copystr(num);
+ }
+ break;
+ case DOT_DEFINE:
+ case DOT_EXTERN:
+ syms_add_global_csl(a->args);
+ break;
+ case DOT_COMM:
+ syms_add_global(a->args->left->name);
+ break;
+ case DOT_LCOMM:
+ syms_add(a->args->left->name);
+ break;
+ case JMPF:
+ case CALLF:
+ /*FALL THROUGH*/
+ case JMP:
+ case CALL:
+ break;
+ default:;
+ }
+ skip_token(n+1);
+ return a;
+}
+
+
+asm86_t *gnu_get_instruction(void)
+{
+ asm86_t *a= nil;
+ expression_t *e;
+ token_t *t;
+
+ while ((t= get_token(0))->symbol == ';' || t->symbol == '/') {
+ zap(); /* if a comment started by a '/' */
+ skip_token(1);
+ }
+
+ if (t->type == T_EOF) return nil;
+
+ if (t->type == T_COMMENT || t->type == T_C_PREPROCESSOR) {
+
+ a = new_asm86();
+ if (t->type == T_COMMENT)
+ a->opcode = COMMENT;
+ else
+ a->opcode = C_PREPROCESSOR;
+
+ a->raw_string = malloc(t->len + 1);
+ if (!a->raw_string)
+ return NULL;
+
+ strcpy(a->raw_string, t->name);
+ skip_token(1);
+ return a;
+ }
+
+ if (t->symbol == '#') {
+ /* Preprocessor line and file change. */
+
+ if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
+ || get_token(2)->type != T_STRING
+ ) {
+ parse_err(1, t, "file not preprocessed?\n");
+ zap();
+ } else {
+ set_file(get_token(2)->name,
+ strtol(get_token(1)->name, nil, 0) - 1);
+
+ /* GNU CPP adds extra cruft, simply zap the line. */
+ zap();
+ }
+ a= gnu_get_instruction();
+ } else
+ if (t->type == T_WORD && get_token(1)->symbol == ':') {
+ /* A label definition. */
+
+ a= new_asm86();
+ a->line= t->line;
+ a->opcode= DOT_LABEL;
+ a->optype= PSEUDO;
+ a->args= e= new_expr();
+ e->operator= ':';
+ e->name= copystr(t->name);
+ syms_add(t->name);
+ skip_token(2);
+ } else
+ if (t->type == T_WORD && get_token(1)->symbol == '=') {
+ int n= 2;
+
+ if ((e= gnu_get_C_expression(&n)) == nil) {
+ zap();
+ a= gnu_get_instruction();
+ } else
+ if (get_token(n)->type != T_COMMENT && get_token(n)->symbol != ';') {
+ parse_err(1, t, "garbage after assignment\n");
+ zap();
+ a= gnu_get_instruction();
+ } else {
+ a= new_asm86();
+ if (get_token(n)->type == T_COMMENT) {
+ token_t *c = get_token(n);
+
+ a->raw_string = malloc(c->len + 1);
+ if (!a->raw_string)
+ return NULL;
+
+ strcpy(a->raw_string, c->name);
+ }
+ a->line= t->line;
+ a->opcode= DOT_EQU;
+ a->optype= PSEUDO;
+ a->args= new_expr();
+ a->args->operator= '=';
+ a->args->name= copystr(t->name);
+ syms_add(t->name);
+ a->args->middle= e;
+ skip_token(n+1);
+ }
+ } else
+ if (t->type == T_WORD) {
+ if ((a= gnu_get_statement()) == nil) {
+ zap();
+ a= gnu_get_instruction();
+ }
+ } else {
+ parse_err(1, t, "syntax error\n");
+ zap();
+ a= gnu_get_instruction();
+ }
+ return a;
+}