/* 
   Rasm: a Real mode ASseMbler
   Copyright (C) 1992 Electronetics, Inc.  All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * Simple two-pass real-mode assembler, which writes
 * directly to .com format, eliminating
 * the need for a linker.
 *
 * Input file format is free form.
 *
 */
#include "rasm.h"

char *inname;
FILE *in;
char *outname = "a.out";
FILE *out;
struct operand *dest, *src;
struct operand destination, source;

int pass;  /* 1 or 2 */
#define SECOND_PASS (pass == 2)
#define FIRST_PASS (pass == 1)
unsigned long lineno;  /* current line number */
unsigned long prevline; /* line number of previous token */
unsigned long nextline; /* line number of next token */
int list_lines;  /* true if listing lines */
char tokchar;    /* first character of a token */
#define LBSIZE 1024
char lex_buffer[LBSIZE]; /* raw lex buffer */
static char *lptr;    /* pointer to current spot in buffer */
static char *limit;   /* end of string in lex buffer */
char id_buffer[LBSIZE];
int id_len;
unsigned short pc; /* position counter */

long intval; /* integer value */

#define SRANGE(x) (((x) & 0xffff0000) == 0 || \
   ((x) & 0xffff0000) == 0xffff0000)
#define BRANGE(x) (((x) & 0xffffff00) == 0 || \
   ((x) & 0xffff0000) == 0xffffff00)

char *xmalloc();
/*
 * Token defs (must be above 255)
 */
#define IDENTIFIER 256
#define LABEL_DEC  257
#define STRING     258
#define INTEGER    259

char *tok_names[] = {"Identifier", "Label", "String", "Integer"};

/*
 * Hashing
 */
#define HSIZE 512

struct hentry {
  char *name;
  long value;
  struct hentry *next; /* link in symtable hash */
  struct hentry *chain;/* link in segment list */
} *htmp, *symtable[HSIZE];
struct hentry *lookup();
struct hentry *seglist;

/*
 * opcode routines
 */
typedef int (*gptr)();
int do_string(), do_org(), do_entry(), do_mov();
int do_skip(), outbyte(), outshort();
int do_idata(), do_pushpop(), optype1(), do_ctis();
int do_int(), do_jmpcall(), do_ret(), do_incdec();
int do_inout(), do_seg(), do_test(), optype2(), do_ea();
int do_xchg(), do_rotshft();

struct opcode {
  char *name;
  gptr routine;
  int  argument;
} opcodes[] = {
  {".align", do_skip, 1},
  {".ascii", do_string, 0},
  {".asciz", do_string, 1},
  {".byte",  do_idata, 1},
  {".entry", do_entry, 0},
  {".long",  do_idata, 4},
  {".org", do_org, 0},
  {".skip",  do_skip, 0},
  {".word",  do_idata, 2},
  {"aaa", outbyte, 0x37},
  {"aad", outshort, 0x0ad5},
  {"aam", outshort, 0x0ad4},
  {"aas", outbyte, 0x3f},
  {"adc", optype1, 0x5},
  {"adcb", optype1, 0x5 | (BYTE << 4)},
  {"adcw", optype1, 0x5 | (WORD << 4)},
  {"add", optype1, 0x1},
  {"addb", optype1, 0x1 | (BYTE << 4)},
  {"addw", optype1, 0x1 | (WORD << 4)},
  {"and", optype1, 0x8},
  {"andb", optype1, 0x8 | (BYTE << 4)},
  {"andw", optype1, 0x8 | (WORD << 4)},
  {"call", do_jmpcall, 0},
  {"callf", do_jmpcall, 0x100},
  {"cbw", outbyte, 0x98},
  {"clc", outbyte, 0xf8},
  {"cld", outbyte, 0xfc},
  {"cli", outbyte, 0xfa},
  {"cmc", outbyte, 0xf5},
  {"cmp", optype1, 0xf},
  {"cmpb", optype1, 0xf | (BYTE << 4)},
  {"cmpsb", outbyte, 0xa6},
  {"cmpsw", outbyte, 0xa7},
  {"cwd", outbyte, 0x99},
  {"daa", outbyte, 0x27},
  {"das", outbyte, 0x2f},
  {"dec", do_incdec, 1},
  {"div", optype2, 0x6},
  {"divb", optype2, 0x6 | (BYTE << 4)},
  {"divw", optype2, 0x6 | (WORD << 4)},
  {"hlt", outbyte, 0xf4},
  {"idiv", optype2, 0x7},
  {"idivb", optype2, 0x7 | (BYTE << 4)},
  {"idivw", optype2, 0x7 | (WORD << 4)},
  {"imul", optype2, 0x5},
  {"imulb", optype2, 0x5 | (BYTE << 4)},
  {"imulw", optype2, 0x5 | (WORD << 4)},
  {"in", do_inout, 1},
  {"inc", do_incdec, 0},
  {"int", do_int, 0},
  {"into", outbyte, 0xce},
  {"iret", outbyte, 0xcf},
  {"ja", do_ctis, 0x77},
  {"jae", do_ctis, 0x73},
  {"jb", do_ctis, 0x72},
  {"jbe", do_ctis, 0x76},
  {"jc", do_ctis, 0x72},
  {"jcxz", do_ctis, 0xe3},
  {"je", do_ctis, 0x74},
  {"jg", do_ctis, 0x7f},
  {"jge", do_ctis, 0x7d},
  {"jl", do_ctis, 0x7c},
  {"jle", do_ctis, 0x7e},
  {"jmp", do_jmpcall, 1},
  {"jmpf", do_jmpcall, 0x101},
  {"jmps", do_jmpcall,0x11},
  {"jnc", do_ctis, 0x73},
  {"jne", do_ctis, 0x75},
  {"jno", do_ctis, 0x71},
  {"jnp", do_ctis, 0x7b},
  {"jns", do_ctis, 0x79},
  {"jnz", do_ctis, 0x75},
  {"jo", do_ctis, 0x70},
  {"jp", do_ctis, 0x7a},
  {"js", do_ctis, 0x78},
  {"jz", do_ctis, 0x74},
  {"lahf", outbyte, 0x9f},
  {"lds", do_ea, 0xc5},
  {"lea", do_ea, 0x8d},
  {"les", do_ea, 0xc4},
  {"lock", outbyte, 0xf0},
  {"lodsb", outbyte, 0xac},
  {"lodsw", outbyte, 0xad},
  {"loop", do_ctis, 0xe2},
  {"loope", do_ctis, 0xe1},
  {"loopne", do_ctis, 0xe0},
  {"loopnz", do_ctis, 0xe0},
  {"loopz", do_ctis, 0xe1},
  {"mov", do_mov, NONE},
  {"movb", do_mov, BYTE},
  {"movsb", outbyte, 0xa4},
  {"movsw", outbyte, 0xa5},
  {"movw", do_mov, WORD},
  {"mul", optype2, 0x4},
  {"mulb", optype2, 0x4 | (BYTE << 4)},
  {"mulw", optype2, 0x4 | (WORD << 4)},
  {"neg", optype2, 0x3},
  {"negb", optype2, 0x3 | (BYTE << 4)},
  {"negw", optype2, 0x3 | (WORD << 4)},
  {"nop", outbyte, 0x90},
  {"not", optype2, 0x2},
  {"notb", optype2, 0x2 | (BYTE << 4)},
  {"notw", optype2, 0x2 | (WORD << 4)},
  {"or", optype1, 0x2},
  {"orb", optype1, 0x2 | (BYTE << 4)},
  {"orw", optype1, 0x2 | (WORD << 4)},
  {"out", do_inout, 0},
  {"pop", do_pushpop, 1},
  {"popf", outbyte, 0x9d},
  {"push", do_pushpop, 0},
  {"pushf", outbyte, 0x9c},
  {"rcl", do_rotshft, 0x2},
  {"rclb", do_rotshft, 0x2 | (BYTE << 4)},
  {"rclw", do_rotshft, 0x2 | (WORD << 4)},
  {"rcr", do_rotshft, 0x3},
  {"rcrb", do_rotshft, 0x3 | (BYTE << 4)},
  {"rcrw", do_rotshft, 0x3 | (WORD << 4)},
  {"rep", outbyte, 0xf3},
  {"repe", outbyte, 0xf3},
  {"repne", outbyte, 0xf2},
  {"repnz", outbyte, 0xf2},
  {"repz", outbyte, 0xf3},
  {"ret", do_ret, 0},
  {"retf", do_ret, 1},
  {"rol", do_rotshft, 0x0},
  {"rolb", do_rotshft, 0x0 | (BYTE << 4)},
  {"rolw", do_rotshft, 0x0 | (WORD << 4)},
  {"ror", do_rotshft, 0x1},
  {"rorb", do_rotshft, 0x1 | (BYTE << 4)},
  {"rorw", do_rotshft, 0x1 | (WORD << 4)},
  {"sahf", outbyte, 0x9e},
  {"sal", do_rotshft, 0x4},
  {"salb", do_rotshft, 0x4 | (BYTE << 4)},
  {"salw", do_rotshft, 0x4 | (WORD << 4)},
  {"sar", do_rotshft, 0x7},
  {"sarb", do_rotshft, 0x7 | (BYTE << 4)},
  {"sarw", do_rotshft, 0x7 | (WORD << 4)},
  {"sbb", optype1, 0x7},
  {"sbbb", optype1, 0x7 | (BYTE << 4)},
  {"sbbw", optype1, 0x7 | (WORD << 4)},
  {"scasb", outbyte, 0xae},
  {"scasw", outbyte, 0xaf},
  {"seg", do_seg, 0},
  {"shl", do_rotshft, 0x4},
  {"shlb", do_rotshft, 0x4 | (BYTE << 4)},
  {"shlw", do_rotshft, 0x4 | (WORD << 4)},
  {"shr", do_rotshft, 0x5},
  {"shrb", do_rotshft, 0x5 | (BYTE << 4)},
  {"shrw", do_rotshft, 0x5 | (WORD << 4)},
  {"stc", outbyte, 0xf9},
  {"std", outbyte, 0xfd},
  {"sti", outbyte, 0xfb},
  {"stosb", outbyte, 0xaa},
  {"stosw", outbyte, 0xab},
  {"sub", optype1, 0xb},
  {"subb", optype1, 0xb | (BYTE << 4)},
  {"subw", optype1, 0xb | (WORD << 4)},
  {"test", do_test, NONE},
  {"testb", do_test, BYTE},
  {"testw", do_test, WORD},
  {"wait", outbyte, 0x9b},
  {"xchg", do_xchg, 0},
  {"xlat", outbyte, 0xd7},
  {"xor", optype1, 0xc},
  {"xorb", optype1, 0xc | (BYTE << 4)},
  {"xorw", optype1, 0xc | (WORD << 4)}
};
struct opcode *otmp;

struct opcode *oplookup(name)
char *name; {
  int i;
  for (i = 0, otmp = &opcodes[0];
       i < sizeof(opcodes)/sizeof(struct opcode); i++, otmp++) {
    if (!strcmp(name, otmp->name)) {
      return otmp;
    }
  }
  return NULL;
}

unsigned char opsize; /* WORD, BYTE, or NONE */

char *regs[] = {
 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
 "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
 "es", "cs", "ss", "ds"
};

main(argc, argv)
int argc;
char **argv; {
  int i;
  in = stdin;
  out = stdout;

  dest = &destination;
  src = &source;
  i = sizeof(opcodes)/sizeof(struct opcode) - 1;
  for (otmp = &opcodes[i]; i > 0; otmp--,i--) {
    if (strcmp(otmp->name,(otmp-1)->name) <= 0) {
      printf("opcode table out of order!\n");
      printf("%s < %s\n",otmp->name,(otmp-1)->name);
      exit(1);
    }
  }
  for (i = 0; i < sizeof(regs)/sizeof(char *); i++) {
    add_entry(regs[i], -(long)(i+1), symtable);
  }
  for (i = 1; i < argc; i++) {
    if (argv[i][0] == '-') {
      switch(argv[i][1]) {
      case 'l': list_lines = 1; break;
      case 'o': if (i < argc - 1) outname = argv[++i]; break;
      case 'n': nocopyright = 1; break;
      default:
        printf("unknown option %s ignored\n",argv[i]);
      break;
      }
    }
    else {
      inname = argv[i];
    }
  }
  if (!nocopyright) {
    printf("Rasm 1.0 (%s) Copyright 1992 Electronetics, Inc.\n",mach);
  }
  if (!inname || !outname) {
    usage(argv[0]);
  }
  if (!(in = fopen(inname,"r"))) {
    perror(inname);
    exit(1);
  }
  if (!(out = fopen(outname,"w"))) {
    perror(outname);
    exit(1);
  }

  pass = 1;
  do_statements();

  pass = 2;
  rewind(in);
  lineno = 0;

  do_statements();
  fclose(in);
  fclose(out);
  return 0;
}

usage(name)
char *name; {
  printf("usage: %s input-name output-name\n",name);
  exit(1);
}

char *strsave(name)
char *name; {
  char *rval;
  rval = xmalloc(strlen(name)+1);
  strcpy(rval, name);
  return rval;
}

/*
 * Statement parser.  Build label symbol table on first pass.
 * Output code on second pass.
 */
do_statements() {
  int done = 0;
  pc = 0;
  while (!done) {
    switch(gettoken()) {
    case LABEL_DEC:
      if (FIRST_PASS) {
        add_entry(strsave(id_buffer), (long)pc, symtable);
      }
    break;
    case IDENTIFIER:
      if (!(otmp = oplookup(id_buffer))) {
        printf("Error: line %ld: unknown opcode %s\n",lineno,id_buffer);
        sexit(1);
      }
      (*(otmp->routine))(otmp->argument);
    break;
    case EOF:
      done = 1;
    break;
    default:
      bad_token(NULL);
    break;
    }
  }
}

/*
 * Generate initialized data
 */
do_idata(size)
int size; {
  int minus = 0, ct;
  do {
    switch(gettoken()) {
    case '-':
      expect(INTEGER);
      minus = 1;    
      /* fall through */
    case INTEGER:
      intval = minus ? -intval : intval;
      break;
    case IDENTIFIER:
      if (size == 2) {
        if (SECOND_PASS) {
          if ((htmp = lookup(id_buffer, symtable)) 
            && (intval = htmp->value) >= 0) {
            break;
          }
          else {
            printf("Error: line %ld: undefined: %s\n",lineno,id_buffer);
            sexit(1);
	  }
	}
        else {
          break;
	}
      }
/* fall-through */
    default:
      bad_token(NULL);
      break;
    }
    switch(size) {
    case 1: outbyte(intval); break;
    case 2: outshort(intval); break;
    case 4: outshort(intval); outshort(intval >> 16); break;
    default:
      printf("Internal error: bad size to idata (%d)\n",size);
      sexit(1);
    }
  }
  while ((ct = gettoken()) == ',');
  ungettoken(ct);
}

expect(tok)
int tok; {
  int i;
  if ((i = gettoken()) != tok) {
    if (tok < 256) {
      printf("Error: line %ld: expected \"%c\"\n", lineno, tok);
    }
    else if (tok - 256 < sizeof(tok_names)/sizeof(char *)){
      printf("Error: line %ld: expected %s\n", lineno, tok_names[tok-256]);
    }
    else {
      bad_token(NULL);
    }
    sexit(1);
  }
}
/*
 * Lexing routines
 */

/*
 * Fetch the next line of input
 */
next_line() 
{
  int rval;
  do {
    if (fgets(lex_buffer, LBSIZE, in) == NULL)
      return EOF;
    if (list_lines && pass == 1) {
      printf("%s",lex_buffer);
    }
    lineno++;
    rval = strlen(lex_buffer);
  }
  while (rval <= 1);
  limit = lex_buffer + rval;
  lptr = lex_buffer;
  return rval;
}

unsigned long startcom;
int tokstack[3];
unsigned long linestk[2];
int lstate;
int tokcount = 0;

ungettoken(tok)
int tok; {
  tokstack[tokcount++] = tok;
  if (tokcount >= 3) {
    printf("token push back stack overflow!\n");
    sexit(1);
  }
  lineno = linestk[lstate]; /* reset to previous linenumber */
  lstate ^= 1;
}

gettoken() {
  int rval;
  if (tokcount) {
    lineno = linestk[lstate];  /* restore line number */
    lstate ^= 1;
    return tokstack[--tokcount];
  }
restart:
  if (lptr >= limit && next_line() == EOF) {
    return EOF;
  } 
  switch(tokchar = *lptr++) {
  case ' ': case '\t': case '\f': case '\n':
    goto restart; /* Skip leading WS */
  case ';': /* comment to end of line */
    lptr = limit;
    goto restart;
  case '/':
    if (*lptr == '*') { /* C-style comment */
      lptr++;
      startcom = lineno;
comstart:
      if (lptr >= limit && next_line() == EOF) {
        printf("Error: line %ld: unterminated comment\n", startcom);
        sexit(1);
      }
      if (*lptr++ == '*' && *lptr++ == '/') {
        goto restart;
      }
      goto comstart;
    }
    goto unknown;
  case '_': case '$': case '.':
  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 
  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 
  case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
  case 's': case 't': case 'u': case 'v': case 'w': case 'x': 
  case 'y': case 'z': 
  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 
  case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 
  case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
  case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 
  case 'Y': case 'Z': 
    rval = identifier(0);
  break;
  case '[': case ']': case ',': case '-': case '+': case ':':
    rval = tokchar;
  break;
  case '"':
    rval = get_string();
  break;
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
    lptr--;
    rval = get_integer(0);
  break;
  case '\'':
    intval = *lptr++;
    if (*lptr++ != '\'') {
      printf("Error: line %ld: bad character\n",lineno);
    }
    return INTEGER;
/* NOTREACHED */
  break;
  default:
unknown:
    printf("Error: line %ld: unknown character %d\n",lineno,tokchar);
    sexit(1);
  break;
  }
  linestk[lstate] = lineno;
  lstate ^= 1;
  return rval;
}

identifier(skip)
int skip; {
  int done = 0, c;
  int rval = IDENTIFIER;
  id_len = 0;
  if (!skip) {
    id_buffer[id_len++] = tokchar;
  }
  while (!done) {
    switch(c = *lptr++) {
    case '_': case '$': case '.':
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 
    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 
    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
    case 's': case 't': case 'u': case 'v': case 'w': case 'x': 
    case 'y': case 'z':
    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 
    case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 
    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 
    case 'Y': case 'Z': case '0': case '1': case '2': case '3': 
    case '4': case '5': case '6': case '7': case '8': case '9':
      id_buffer[id_len++] = c;
    break;
    case ':':
      lptr++;
      rval = LABEL_DEC;
/* fall through */
    default:
      lptr--;
      id_buffer[id_len] = 0;
      done = 1;
    break;
    }
  }
  return rval;
}

#define isoctal(x) ((x) <= '7' && (x) >= '0')
get_string() {
  int id_len = 0, c;
  unsigned long startstr;
  startstr = lineno;
  while (1) {
    if (lptr >= limit && next_line() == EOF) {
      printf("Error: line %ld: unterminated string\n", startstr);
      sexit(1);
    }
    switch(c = *lptr++) {
      case '\\':
        switch (c = *lptr++) {
	case 'b': id_buffer[id_len++] = 0x8; break; 
	case 'f': id_buffer[id_len++] = 0xc; break; 
	case 'n': id_buffer[id_len++] = 0xa; break; 
	case 'r': id_buffer[id_len++] = 0xd; break; 
        case '0': case '1': case '2': case '3':
        case '4': case '5': case '6': case '7':
          if (isoctal(*lptr) && isoctal(*(lptr+1))) {
            id_buffer[id_len++] = 64*((unsigned int)*(lptr-1)) +
              8*((unsigned int)*lptr) + (unsigned int)(*(lptr+1));
            lptr += 2;
            break;
          }
/* fall through */
        default:
          id_buffer[id_len++] = c;
        break;
        }
      break;
      case '"':
        id_buffer[id_len] = '\0';
        return STRING;
      default:
        if (id_len >= sizeof(id_buffer) - 2) {
          printf("Error: line %ld: string too large\n", startstr);
          sexit(1);
        }
        id_buffer[id_len++] = c;
      break; 
    }
  }
}

get_integer(minusp)
int minusp; {
  long rval = 0L;
  int base = 10, c, done = 0;
  switch(c = *lptr++) {
  case '0':
    if (*lptr == 'x' || *lptr == 'X') {
      lptr++;
      base = 16;
    }
    else {
      base = 8;
    }
  break;
  case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
    rval = c - '0';
  break;
  default:
    printf("Error: line %ld: bad integer\n",lineno);
    sexit(1);
  break;
  }
  while (!done) {
    switch(c = *lptr++) {
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7':
      rval = rval * (long)base + (c - '0');
    break;
    case '8': case '9':
      if (base < 10) {
        lptr--;
        done = 1;
      }
      else {
        rval = rval * (long)base + (c - '0');
      }
    break;
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
      c = 'A' + (c - 'a');
    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
      if (base < 16) {
        lptr--;
        done = 1;
      }
      else {
        rval = (rval << 4) + (c - ('A' - 10));
      }
    break;
    default:
      lptr--;
      done = 1;
    break;
    }
  }
  intval = minusp ? -rval : rval;
  return INTEGER;
}

do_skip(alignp)
int alignp; {
  char c = 0;
  int ival;
  if (gettoken() != INTEGER || intval < 0) {
    bad_token(NULL);
  }
  if (alignp) {
    if (intval > 8 || intval < 0) {
      bad_token(NULL);
    }
    switch(ival = intval) {
    case 1: case 2: case 4: case 8:
    break;
    default:
      bad_token(NULL);
    break;
    }
    if (ival > 1 && (pc % ival)) {
      intval = (ival - (pc % ival));
    }
    else intval = 0;
  }

  advance(intval);
  if (SECOND_PASS) {
    while (intval-- > 0) {
      fwrite((char *)&c, 1, 1, out);
    }
  }
}

do_string(zerop)
int zerop; {
  int len, ct;
  while (1) {
    if (gettoken() != STRING) {
      bad_token(NULL);
    }
    len = strlen(id_buffer);
    if (zerop) {
      len++;
    }
    advance(len);
    if (SECOND_PASS) {
      fwrite(id_buffer, len, 1, out);
    }
    if ((ct = gettoken()) != ',') {
      ungettoken(ct);
      break;
    }
  }
}

bad_token(mess)
char *mess; {
  if (!list_lines)
    printf("%s",lex_buffer); /* has newline at end already */
  printf("--- Error: line %ld: %s\n",lineno,mess?mess:"bad token");
  sexit(1);
}

/*
 * Symbol (label) management
 */
hash(name)
char *name; {
  int hval = 0;
  while (*name) {
    hval += *name++;
  }
  return hval % HSIZE;
}

struct hentry dot;

struct hentry *lookup(name, table)
char *name; 
struct hentry *table[]; {
  int bucket;
  bucket = hash(name);
  for (htmp = table[bucket]; htmp; htmp = htmp->next) {
    if (!strcmp(name,htmp->name)) {
      return htmp;
    }
  }
  if (!strcmp(name,".")) {
    dot.value = pc;
    return &dot;
  }
  return NULL;
}

add_entry(name, value, table)
char *name; 
long value;
struct hentry *table[]; {
  int bucket;
  if (lookup(name, table)) {
    printf("Error: line %ld: %s multiply defined\n",lineno,name);
    sexit(1);
  }
  htmp = (struct hentry *)xmalloc(sizeof(struct hentry));
  htmp->name = name;
  htmp->value = value;
  bucket = hash(name);
  htmp->next = table[bucket];
  table[bucket] = htmp;
  if (pass > 0) {
    htmp->chain = seglist;
    seglist = htmp;
  }
}

char *xmalloc(size)
int size; {
#ifdef MSDOS
#ifndef unix
  char *farcalloc();
#define calloc farcalloc
#endif /* unix */
#endif
  char *rval, *calloc();
  if ((rval = calloc((long)size, 1L)) == NULL) {
    printf("Fatal error: line %ld: out of memory\n",lineno);
    sexit(1);
  }
  return rval;
}

sexit(status)
int status; {
  fclose(in);
  fclose(out);
  unlink(outname);
  exit(status);
}

do_org() {
  expect(INTEGER);
  pc = intval;
}

do_entry() {
  char c = 0;
  unsigned int nbytes;
  expect(INTEGER);
  if (intval < pc) {
    printf("error: line %ld: entry point overwritten\n",lineno);
    sexit(1);
  }
  nbytes = intval - pc;
  advance(nbytes);
  if (SECOND_PASS) {
    while (nbytes-- > 0) {
      fwrite((char *)&c, 1, 1, out);
    }
  }
}

/*
 * Operand fields:
 *
 * reg: true if arg is a normal register
 * segreg: true if arg is a segment register
 * accum: true if arg is AX or AL
 * stat: true if arg is a static memory location
 * mem: true if arg is a dynamic memory location (implies stat)
 * word: true if arg is word-sized
 * immed: true if arg is immediate data
 */

do_xchg() {
  opsize = NONE;
  get_operand(dest);
  expect(',');
  get_operand(src);
  if (dest->accum && dest->size == WORD && src->reg) {
    outbyte(0x90 + src->regnum);
    return;
  }
  if (dest->reg && src->mem) {
    outbyte(0x86 + (opsize == WORD));
    src->regnum = dest->regnum;
    outbyte(src);
    return;
  }
  if (src->reg && dest->mem) {
    outbyte(0x86 + (opsize == WORD));
    dest->regnum = src->regnum;
    outbyte(dest);
    return;
  }
  bad_token("xchg operands");
}

/*
 * bcode bits:
 *   bits 5-4: size code (BYTE, WORD or NONE)
 *   bits 2-0: regnum for modrm byte
 */
do_rotshft(bcode)
unsigned char bcode; {
  unsigned char v;
  opsize = bcode >> 4;
  get_operand(dest);
  if (opsize == NONE) {
    bad_token("cannot determine operand size");
  }
  expect(',');
  get_operand(src);
  if (src->immed && src->val == 1) {
    v = 0;
  }
  else if (src->reg && src->size == BYTE && src->regnum == 1) {
    v = 2;
  }
  else {
    bad_token("expected 1 or CL");
  }
  outbyte(0xd0 + v + (opsize == WORD));
  dest->regnum = bcode & 0x7;
  outmodrm(dest);
}

do_ea(bcode)
unsigned char bcode; {
  opsize = NONE;
  get_operand(dest);
  expect(',');
  get_operand(src);
  if (!src->mem || !(dest->reg && dest->size == WORD)) {
    bad_token("bad address or register");
  }
  outbyte(bcode);
  src->regnum = dest->regnum;
  outmodrm(src);
}

/*
 * bcode bits:
 *   bits 5-4: size code (BYTE, WORD or NONE)
 *   bits 2-0: regnum for modrm byte
 */
optype2(bcode)
unsigned char bcode; {
  opsize = bcode >> 4;
  get_operand(dest);
  if (opsize == NONE) {
    bad_token("cannot determine operand size");
  }
  outbyte(opsize == WORD ? 0xf7 : 0xf6);
  dest->regnum = bcode & 0xf;
  outmodrm(dest);
}

do_seg() {
  opsize = NONE;
  get_operand(dest);
  if (dest->segreg) {
    outbyte(0x26 + ((unsigned int)dest->regnum << 3));
  }
  else {
    bad_token("segment override");
  }
}

do_inout(inp)
unsigned char inp; {
  int i, varp = 0;
  if (inp) {
    opsize = get_accum();
    expect(',');
    if ((i = gettoken()) == IDENTIFIER) {
      if (strcmp(id_buffer,"dx")) {
        bad_token("in operand");
      }
      outbyte(0xec + (opsize == WORD));
    }
    else {
      ungettoken(i);
      get_byte();
      outbyte(0xe4 + (opsize == WORD));
      outbyte(intval);
    }
  }
  else {
    if ((i = gettoken()) == IDENTIFIER) {
      if (strcmp(id_buffer,"dx")) {
        bad_token("out operand");
      }
      varp = 1;
    }
    else {
      ungettoken(i);
      i = get_byte();
    }
    expect(',');
    opsize = get_accum();
    if (varp) {
      outbyte(0xee + (opsize == WORD));
    }
    else {
      outbyte(0xe6 + (opsize == WORD));
      outbyte(i);
    }
  }
}

get_accum() {
  if (gettoken()== IDENTIFIER) {
    if (!strcmp(id_buffer,"al")) return BYTE;
    if (!strcmp(id_buffer,"ax")) return BYTE;
  }
  return bad_token("expected AX or AL");
}

do_incdec(decp)
unsigned char decp; {
  opsize = NONE;
  get_operand(dest);
  if (dest->reg && dest->size == WORD) {
    outbyte(0x40 + dest->regnum + (decp << 3));
    return;
  }
  if (dest->mem || dest->reg) {
    outbyte(dest->size == WORD ? 0xff : 0xfe);
    dest->regnum = decp;
    outmodrm(dest);
    return;
  }
  bad_token("inc/dec operand");
}

do_ret(farp)
unsigned char farp; {
  int i;
  if ((i = gettoken()) == INTEGER) {
    if (!SRANGE(intval)) {
      bad_token("bad integer value");
    }
    outbyte(farp ? 0xca : 0xc2);
    outshort(intval);
  }
  else {
    ungettoken(i);
    outbyte(farp ? 0xcb : 0xc3);
  }
}

/* call/jmp code:
 *    bit 0: true if this is a jmp (otherwise call assumed)
 *    bit 4: true if short jmp (intra-segment direct short)
 *    bit 8: true if this is a far (inter-segment) operation
 */
do_jmpcall(jcode)
unsigned short jcode; {
  unsigned char isjmp;
  unsigned char farp;
  isjmp = (jcode << 1) & 0x2;
  farp = (jcode >> 8) & 0x1;
  opsize = WORD;
  get_operand(dest);
  if (dest->immed) {
    if (farp) {
      expect(':');
      get_operand(src);
      if (!src->immed) {
        bad_token("callf/jmpf operands");
      }
/* direct */
      outbyte(isjmp ? 0xea : 0x9a);
      outshort(src->val);
    }
    else {
      if (isjmp) {
        if ((jcode & 0x10)) {
          if (SECOND_PASS && 
             (((int)dest->val < (int)pc - 128) || 
              ((int)dest->val > (int)pc + 127))) {
            bad_token("too far for short jmp");
	  }
          outbyte(0xeb);
          outbyte(dest->val - (pc+1));
          return;
        }
        else if (SECOND_PASS && 
           (int)dest->val < (int)pc - 128 &&
              (int)dest->val > (int)pc + 127) {
          printf("line %d: could use jmps",lineno);
        }
        outbyte(0xe9);
        outshort(dest->val - (pc+2));
        return;
      }
      else {
        outbyte(0xe8);
        outshort(dest->val - (pc+2));
        return;
      }
    }
    outshort(dest->val);
    return;
  }
  if (dest->mem) {
/* indirect */
      outbyte(0xff);
      dest->regnum = (farp ? 3 : 2) + isjmp;
      outmodrm(dest);
      return;
  }
  if (farp) {
    bad_token("callf/jmpf operands");
  }
  else {
    bad_token("call/jmp operands");
  }
}

do_int() {
  get_byte();
  if (intval == 3) {
    outbyte(0xcc);
    return;
  }
  outbyte(0xcd);
  outbyte(intval);
}

get_byte() {
  expect(INTEGER);
  if (!BRANGE(intval)) {
    bad_token("expected byte");
  }
  return intval;
}

/*
 * Short control-transfer instruction
 */
do_ctis(bcode)
int bcode; {
  opsize = NONE;
  get_operand(dest);
  if (!dest->immed) {
    bad_token("expected label");
  }
  if (SECOND_PASS && 
       (((int)dest->val < (int)pc - 128) || 
        ((int)dest->val > (int)pc + 127))) {
    bad_token("too far for short jump");
  }
  outbyte(bcode);
  outbyte(dest->val - (pc+1));
}

do_test(bcode)
unsigned char bcode; { 
  opsize = bcode;
  get_operand(dest);
  expect(',');
  get_operand(src);
  if (opsize == NONE) {
    bad_token("cannot determine operand size");
  }
  if (src->immed) {
    if (dest->accum) {
/* immediate operand to accumulator */
      outbyte(0xa8 + (opsize == WORD));
    }
    else if (dest->reg || dest->mem) {
/* immediate operand to memory or register */
      outbyte(opsize == WORD ? 0xf7 : 0xf6);
      dest->regnum = 0;
      outmodrm(dest);
    }
    outbyte(src->val);
    if (opsize == WORD) {
      outbyte(src->val >> 8);
    }
    return;
  }
  if (src->reg) {
/* register to register or memory */
    if (dest->reg || dest->mem) {
      outbyte(0x84 + (opsize == WORD));
      dest->regnum = src->regnum;
      outmodrm(dest);
      return;
    }
  }
  if (src->mem && dest->reg) {
/* memory to register */
    outbyte(0x84 + (opsize == WORD));
    src->regnum = dest->regnum;
    outmodrm(src);
    return;
  }
  bad_token("test operands");
}

/*
 * bcode bits:
 *   bits 5-4: size code (BYTE, WORD or NONE)
 *   bits 3-1: regnum for modrm byte
 *   bit    0: sign-extend bit
 */
optype1(bcode)
unsigned short bcode; { /* add, and, cmp, or, xor, adc, sbb, sub */
  opsize = bcode >> 4;
  get_operand(dest);
  expect(',');
  get_operand(src);
  if (opsize == NONE) {
    bad_token("cannot determine operand size");
  }
  if (src->immed) {
    if (dest->accum) {
/* immediate operand to accumulator */
      outbyte(((bcode | 01) << 2) + (opsize == WORD));
    }
    else if (dest->reg || dest->mem) {
/* immediate operand to memory or register */
      if (opsize == WORD) {
        if ((bcode & 0x1) && BRANGE(src->val)) {
          outbyte(0x83);
          opsize = BYTE;
        }
        else {
          outbyte(0x81);
	}
      }
      else {
        outbyte(0x80);
      }
      dest->regnum = (bcode >> 1) & 0x7;
      outmodrm(dest);
    }
    outbyte(src->val);
    if (opsize == WORD) {
      outbyte(src->val >> 8);
    }
    return;
  }
  if (src->reg) {
/* register to register or memory */
    if (dest->reg || dest->mem) {
      outbyte(((bcode & 0xe) << 2) + (opsize == WORD));
      dest->regnum = src->regnum;
      outmodrm(dest);
      return;
    }
  }
  if (src->mem && dest->reg) {
/* memory to register */
    outbyte(((bcode & 0xe) << 2) + (opsize == WORD ? 3 : 2));
    src->regnum = dest->regnum;
    outmodrm(src);
    return;
  }
  bad_token("optype1 operands");
}

do_pushpop(ispop)
int ispop; {
  opsize = WORD;
  get_operand(src);
  if (src->segreg) {
    if (ispop && src->regnum == 1) {
      bad_token("cannot pop CS");
    }
    outbyte((ispop ? 0x7 : 0x6) | (unsigned int)src->regnum << 3);
    return;
  }
  if (src->reg) {
    outbyte((ispop ? 0x58 : 0x50) | (unsigned int)src->regnum);
    return;
  }
  if (src->mem) {
    outbyte(0x8f);
    src->regnum = 0;
    outmodrm(src);
  }
}

do_mov(mtype)
int mtype; {
  opsize = mtype;
  get_operand(dest);
  expect(',');
  get_operand(src);
  if (opsize == NONE) {
    bad_token("cannot determine operand size");
  }
  
  if (src->segreg) {
/* seg reg to mem/reg */
    outbyte(0x8c);
    dest->regnum = src->regnum;
    outmodrm(dest);
    return;
  }
  if (src->accum && dest->stat) {
/* accumulator to memory operand */
    outbyte((opsize == WORD) ? 0xa3 : 0xa2);
    outbyte(dest->val);
    outbyte(dest->val >> 8);
    return;
  }
  if (dest->segreg && dest->regnum != 1) {
/* mem/reg to seg reg */
    outbyte(0x8e);
    src->regnum = dest->regnum;
    outmodrm(src);
    return;
  }
  if (dest->accum && src->stat) {
/* memory operand to accumulator */
    outbyte((opsize == WORD) ? 0xa1 : 0xa0);
    outbyte(src->val);
    outbyte(src->val >> 8);
    return;
  }
  if (src->immed) {
    if (dest->reg) {
/* immediate operand to register */
      if (opsize == WORD) {
        outbyte(0xb8 + dest->regnum);
        outbyte(src->val);
        outbyte(src->val >> 8);
        return;
      }
      outbyte(0xb0 + dest->regnum);
      outbyte(src->val);
      return;
    }
    if (dest->mem) {
/* immediate operand to memory */
      outbyte(opsize == WORD ? 0xc7 : 0xc6);
      outmodrm(dest);
      outbyte(src->val);
      if (opsize == WORD) {
        outbyte(src->val >> 8);
      }
      return;
    }
  }
  if (src->reg) {
/* register to register or memory */
    if (dest->reg || dest->mem) {
      outbyte(opsize == WORD ? 0x89 : 0x88);
      dest->regnum = src->regnum;
      outmodrm(dest);
      return;
    }
  }
  if (src->mem && dest->reg) {
/* memory to register */
    outbyte(opsize == WORD ? 0x8b : 0x8a);
    src->regnum = dest->regnum;
    outmodrm(src);
    return;
  }
  bad_token("mov operands");
}

outbyte(b)
unsigned char b; {
  advance(1);
  if (!SECOND_PASS) return;
  fwrite((char *)&b, 1, 1, out);
}

outshort(s)
unsigned short s; {
  outbyte(s);
  outbyte(s >> 8);
}

outmodrm(op)
struct operand *op; {
  unsigned int modrm;
  unsigned char c;
  modrm = ((unsigned int)op->mod << 3) + op->rm;

  c = (unsigned int)op->mod << 6 | (unsigned int)op->regnum << 3 |
        op->rm;
  outbyte(c);

  if (modrm > 0x7 && modrm < 0x10) {
    outbyte(op->val);
  }
  else if (modrm == 0x06 || (modrm < 0x18 && modrm > 0x0f)) {
    outbyte(op->val);
    outbyte(op->val >> 8);
  }
}

get_operand(op)
struct operand *op; {
  int i;
  long val = 0L;
  bzero(op,sizeof(struct operand));
  switch(i = gettoken()) {
  case IDENTIFIER:
    if (!(htmp = lookup(id_buffer, symtable))) {
      if (SECOND_PASS) {
        printf("Error: line %ld: undefined: %s\n",lineno,id_buffer);
        sexit(1);
      }
    }
    else op->val = val = htmp->value;
    if (val < 0) { /* register or segment register */
      switch((int)val) {
      case AX: op->accum = 1;
/* fall-through */
      case CX: case DX: case BX: case SP: case BP:
      case SI: case DI:
        op->size = WORD;
        op->reg = 1;
        op->mod = 3;
        op->rm = op->regnum = -(val+1);
      break;
      case AL: op->accum = 1;
/* fall-through */
      case CL: case DL: case BL: case AH: case CH:
      case DH: case BH:
        op->size = BYTE;
        op->reg = 1; 
        op->mod = 3;
        op->rm = op->regnum = -(val+9);
      break;
      case ES: case CS: case SS: case DS:
        op->size = WORD;
        op->segreg = 1;
        op->regnum = -(val+17);
      break;
      default: bad_token("get_operand: bad register");
      }
    }
    else { /* assume a label location */
      op->immed = 1;
    }
  break;
  case INTEGER:
    if (!SRANGE(intval) || 
        (opsize == BYTE && !BRANGE(intval))) {
      unsigned short mask = 0xffff;
      if (opsize == BYTE) {
        mask = 0xff;
      }
      printf("warning: line %ld: integer 0x%lx truncated to 0x%x\n",
        lineno,intval,intval&mask);
      intval &= mask;
    }
    op->immed = 1; op->val = intval;
  break;
  case '[':
    get_ea(op);
  break;
  default:
    bad_token("expected operand");
  break;
  }
/*
 * check operand size here
 */
  if (opsize == NONE) {
    opsize = op->size;
  }
  else if (op->size && op->size != opsize) {
    bad_token("operand size mismatch");
  }
}

#define IS_DISP(val) ((val) == BX || (val) == DI || (val) == SI || (val) == BP)

get_ea(op)
struct operand *op; {
  int i;
  long val = 0L;
  op->mem = 1;
  op->val = 0;
  switch(gettoken()) {
  case IDENTIFIER:
    if (!(htmp = lookup(id_buffer, symtable))) {
      if (SECOND_PASS) {
        printf("Error: line %ld: undefined: %s\n",lineno,id_buffer);
        sexit(1);
      }
    }
    else op->val = val = intval = htmp->value;
    if (val < 0) {
      if (!IS_DISP(val)) {
        bad_token("bad register for effective address");
      }
      do_reg_ea(op, -(val+1));
      break;
    }
/* fall-through */
  case INTEGER:
    if (!SRANGE(intval)) {
      printf("warning: line %ld: integer 0x%lx truncated to 0x%x\n",
        lineno,intval,intval&0xffff);
      intval &= 0xffff;
    }
    op->mem = op->stat = 1;
    op->val = intval;
    op->mod = 0;
    op->rm = 6;
    while ((i = gettoken()) != ']') {
      if (i == '+') {
        expect(INTEGER);
        if (!SRANGE(intval)) {
          printf("warning: line %ld: integer 0x%lx truncated to 0x%x\n",
            lineno,intval,intval&0xffff);
          intval &= 0xffff;
        }
        op->val += intval;
      }
      else bad_token("effective address");
    }

  break;
  default: bad_token("effective address");
  break;
  }
}

do_reg_ea(op, val)
struct operand *op; 
long val; {
  unsigned short regmask;
  int i, regcount = 0;
  unsigned char gotdisp = NONE;
  regmask = 1 << (int)val;
  op->mem = 1;
  while ((i = gettoken()) != ']') {
reswitch:
    switch(i) {
    case '+':
      i = gettoken();
      goto reswitch;
/* NOTREACHED */
    break;
    case IDENTIFIER:
      if (!(htmp = lookup(id_buffer, symtable))) {
        bad_token("offset register syntax");
      }
      if ((intval = htmp->value) < 0) {
        if (!IS_DISP(htmp->value)) {
          bad_token("offset register syntax");
        }
        i = -((int)(htmp->value))+1;
        if (regmask & (1 << i)) {
          bad_token("offset register syntax");
        }
        regmask |= (1 << i);
        regcount++;
        break;
      }
/* fall_through */
    case INTEGER:
      if (gotdisp) {
        bad_token("offset displacement syntax");
      }
      gotdisp = WORD;
      if (!SRANGE(intval)) {
        printf("warning: line %ld: integer 0x%lx truncated to 0x%x\n",
          lineno,intval,intval&0xffff);
        intval &= 0xffff;
      }
      else if (BRANGE(intval)) {
        gotdisp = BYTE;
      }
      op->val = intval;
      if (op->val == 0) {
        gotdisp = NONE;
      }
    break;
    default:
      bad_token("register effective address");
    break;
    }
  }

  switch (gotdisp) {
  case NONE:
    op->mod = 0;
  break;
  case BYTE:
    op->mod = 1;
  break;
  case WORD:
    op->mod = 2;
  break;
  }

  switch(regmask) {
  case 0x8: /* [BX], [BX+disp] */
    op->rm = 7;
  break;
  case 0x20: /* [BP], [BP+disp] */
    op->rm = 6;
  break;
  case 0x80: /* [DI], [DI+disp] */
    op->rm = 5;
  break;
  case 0x40: /* [SI], [SI+disp] */
    op->rm = 4;
  break;
  case 0xa0: /* [BP+DI], [BP+DI+disp] */
    op->rm = 3;
  break;
  case 0x60: /* [BI+SI], [BP+SI+disp] */
    op->rm = 2;
  break;
  case 0x88: /* [BX+DI], [BX+DI+disp] */
    op->rm = 1;
  break;
  case 0x48: /* [BX+SI], [BX+SI+disp] */
    op->rm = 0;
  break;
  default:
    bad_token("register effective address");
  break;
  }
}

advance(count)
int count; {
  if ((count + (long)pc) > 0x10000L) {
    bad_token("Code segment overflow");
  }
  pc += count;
}
