/* iso2asc -- a simple, but powerful tool that allows you to convert
              text files coded using the 8-bit character set ISO 8859-1
              as readable as possible to 7-bit ASCII files and a few
              other character sets.

   This C program should compile easily without modifications with
   allmost any C compiler (old K&R, ISO C and C++) under almost
   any operating system. The manual is printed when the program is
   started without command line arguments

   Author: Markus Kuhn, University of Erlangen, Germany
           <mskuhn@immd4.uni-erlangen.de>

   1993-11-02

   Feel free to redistribute this software.                              */

#include <stdio.h>

#define ISO_TABLES 7
#define BUF_LENGTH 3000

char inbuf[BUF_LENGTH];
char outbuf[4*BUF_LENGTH];

/* Print a short manual to stderr and abort */
  void
usage()
{
  fprintf(stderr,"iso2asc V1.1 -- Markus Kuhn\n\n");
  fprintf(stderr,"Usage: iso2asc <tablenr.> {<codenr.>=<replacement>} ");
  fprintf(stderr,"[-s] [-l]\n\n");
  fprintf(stderr,"Reads a text file from standard input ");
  fprintf(stderr,"encoded with the 8-bit character set\nISO 8859-1. ");
  fprintf(stderr,"Standard output is the text file with non-ASCII ");
  fprintf(stderr,"characters replaced\n");
  fprintf(stderr,"by ASCII characters as readable as possible. All users ");
  fprintf(stderr,"can't be satisfied with\none single transcription ");
  fprintf(stderr,"table, so select one of the following %d tables:\n\n",
                 ISO_TABLES);
  fprintf(stderr,"   0  universal table for many languages (simply remove ");
  fprintf(stderr,"all accents)\n");
  fprintf(stderr,"   1  monospaced version of table 0\n");
  fprintf(stderr,"   2  table for Danish, Dutch, German, Norwegian and ");
  fprintf(stderr,"Swedish (a+\" -> ae, etc.)\n");
  fprintf(stderr,"   3  table for Danish, Finnish, Norwegian and Swedish ");
  fprintf(stderr,"using\n      the appropriate ISO 646 variant of ");
  fprintf(stderr,"ASCII (o+\" -> |, etc.)\n");
  fprintf(stderr,"   4  table with RFC 1345 codes in brackets ");
  fprintf(stderr,"(e.g. e+^ -> [e>], etc.)\n");
  fprintf(stderr,"   5  table for printers that allow overstriking ");
  fprintf(stderr,"with backspace\n");
  fprintf(stderr,"   6  IBM PC character set (code page 437) output\n");
  fprintf(stderr,"\nModify any of the tables by adding command ");
  fprintf(stderr,"line arguments like e.g. '169=(C)',\nwhere 169 is the ");
  fprintf(stderr,"character number of the Latin 1 copyright sign and '(C)' ");
  fprintf(stderr,"is\nyour replacement. With 'SUB=_', you can ");
  fprintf(stderr,"specify that you prefer '_' instead of\n'?' if no ");
  fprintf(stderr,"reasonable replacement is possible. Option '-s' avoids ");
  fprintf(stderr,"spaces being\nremoved for column correction and ");
  fprintf(stderr,"-l prints the table to standard output.\n");

  exit(1);
}

/* Conversion tables for displaying the G1 set (0xa0-0xff) of
   ISO Latin 1 (ISO 8859-1) with 7-bit ASCII characters.

   Version 1.2 -- error corrections are welcome

   Table   Purpose
     0     universal table for many languages
     1     single-spacing universal table
     2     table for Danish, Dutch, German, Norwegian and Swedish
     3     table for Danish, Finnish, Norwegian and Swedish using
           the appropriate ISO 646 variant.
     4     table with RFC 1345 codes in brackets
     5     table for printers that allow overstriking with backspace
     6     table for IBM PC character set (code page 437)

   Markus Kuhn <mskuhn@immd4.informatik.uni-erlangen.de>                 */

#define SUB NULL       /* used if no reasonable ASCII string is possible */

static char *iso2asc[ISO_TABLES][96] = {{
  " ","!","c",SUB,SUB,"Y","|",SUB,"\"","(c)","a","<<","-","-","(R)","-",
  " ","+/-","2","3","'","u","P",".",",","1","o",">>"," 1/4"," 1/2"," 3/4","?",
  "A","A","A","A","A","A","AE","C","E","E","E","E","I","I","I","I",
  "D","N","O","O","O","O","O","x","O","U","U","U","U","Y","Th","ss",
  "a","a","a","a","a","a","ae","c","e","e","e","e","i","i","i","i",
  "d","n","o","o","o","o","o",":","o","u","u","u","u","y","th","y"
},{
  " ","!","c",SUB,SUB,"Y","|",SUB,"\"","c","a","<","-","-","R","-",
  " ",SUB,"2","3","'","u","P",".",",","1","o",">",SUB,SUB,SUB,"?",
  "A","A","A","A","A","A","A","C","E","E","E","E","I","I","I","I",
  "D","N","O","O","O","O","O","x","O","U","U","U","U","Y","T","s",
  "a","a","a","a","a","a","a","c","e","e","e","e","i","i","i","i",
  "d","n","o","o","o","o","o",":","o","u","u","u","u","y","t","y"
},{
  " ","!","c",SUB,SUB,"Y","|",SUB,"\"","(c)","a","<<","-","-","(R)","-",
  " ","+/-","2","3","'","u","P",".",",","1","o",">>"," 1/4"," 1/2"," 3/4","?",
  "A","A","A","A","Ae","Aa","AE","C","E","E","E","E","I","I","I","I",
  "D","N","O","O","O","O","Oe","x","Oe","U","U","U","Ue","Y","Th","ss",
  "a","a","a","a","ae","aa","ae","c","e","e","e","e","i","i","i","i",
  "d","n","o","o","o","o","oe",":","oe","u","u","u","ue","y","th","ij"
},{
  " ","!","c",SUB,"$","Y","|",SUB,"\"","(c)","a","<<","-","-","(R)","-",
  " ","+/-","2","3","'","u","P",".",",","1","o",">>"," 1/4"," 1/2"," 3/4","?",
  "A","A","A","A","[","]","[","C","E","@","E","E","I","I","I","I",
  "D","N","O","O","O","O","\\","x","\\","U","U","U","^","Y","Th","ss",
  "a","a","a","a","{","}","{","c","e","`","e","e","i","i","i","i",
  "d","n","o","o","o","o","|",":","|","u","u","u","~","y","th","y"
},{
  "[NS]","[!I]","[Ct]","[Pd]","[Cu]","[Ye]","[BB]","[SE]",
  "[':]","[Co]","[-a]","[<<]","[NO]","[--]","[Rg]","['-]",
  "[DG]","[+-]","[2S]","[3S]","['']","[My]","[PI]","[.M]",
  "[',]","[1S]","[-o]","[>>]","[14]","[12]","[34]","[?I]",
  "[A!]","[A']","[A>]","[A?]","[A:]","[AA]","[AE]","[C,]",
  "[E!]","[E']","[E>]","[E:]","[I!]","[I']","[I>]","[I:]",
  "[D-]","[N?]","[O!]","[O']","[O>]","[O?]","[O:]","[*X]",
  "[O/]","[U!]","[U']","[U>]","[U:]","[Y']","[TH]","[ss]",
  "[a!]","[a']","[a>]","[a?]","[a:]","[aa]","[ae]","[c,]",
  "[e!]","[e']","[e>]","[e:]","[i!]","[i']","[i>]","[i:]",
  "[d-]","[n?]","[o!]","[o']","[o>]","[o?]","[o:]","[-:]",
  "[o/]","[u!]","[u']","[u>]","[u:]","[y']","[th]","[y:]"
},{
  " ","!","c\b|","L\b-","o\bX","Y\b=","|",SUB,
  "\"","(c)","a\b_","<<","-\b,","-","(R)","-",
  " ","+\b_","2","3","'","u","P",".",
  ",","1","o\b_",">>"," 1/4"," 1/2"," 3/4","?",
  "A\b`","A\b'","A\b^","A\b~","A\b\"","Aa","AE","C\b,",
  "E\b`","E\b'","E\b^","E\b\"","I\b`","I\b'","I\b^","I\b\"",
  "D\b-","N\b~","O\b`","O\b'","O\b^","O\b~","O\b\"","x",
  "O\b/","U\b`","U\b'","U\b^","U\b\"","Y\b'","Th","ss",
  "a\b`","a\b'","a\b^","a\b~","a\b\"","aa","ae","c\b,",
  "e\b`","e\b'","e\b^","e\b\"","i\b`","i\b'","i\b^","i\b\"",
  "d\b-","n\b~","o\b`","o\b'","o\b^","o\b~","o\b\"","-\b:",
  "o\b/","u\b`","u\b'","u\b^","u\b\"","y\b'","th","y\b\""
},{
  "\377","\255","\233","\234",SUB,"\235","|","\25",
  "\"","(c)","\246","\256","\252","-","(R)","-",
  "\370","\361","\375","3","'","\346","\24","\371",
  ",","1","\247","\257","\254","\253"," 3/4","\250",
  "A","A","A","A","\216","\217","\222","\200",
  "E","\220","E","E","I","I","I","I",
  "D","\245","O","O","O","O","\231","x",
  "\355","U","U","U","\232","Y","T","\341",
  "\205","\240","\203","a","\204","\206","\221","\207",
  "\212","\202","\210","\211","\215","\241","\214","\213",
  "d","\244","\225","\242","\223","o","\224","\366",
  "\355","\227","\243","\226","\201","y","t","\230"
}};


/*
 *  Transform an 8-bit ISO Latin 1 string iso into a 7-bit ASCII string asc
 *  readable on old terminals using conversion table t.
 *
 *  worst case: strlen(iso) == 4*strlen(asc)
 */
  void
Latin1toASCII(iso, asc, t)
  unsigned char *iso, *asc;
  int t;
{
  char *p, **tab;

  if (iso==NULL || asc==NULL) return;

  tab = iso2asc[t] - 0xa0;
  while (*iso) {
    if (*iso > 0x9f) {
      p = tab[*(iso++)];
      while (*p) *(asc++) = *(p++);
    } else {
      *(asc++) = *(iso++);
    }
  }
  *asc = 0;

  return;
}


/*
 *  Transform an 8-bit ISO Latin 1 string iso into a 7-bit ASCII string asc
 *  readable on old terminals using conversion table t. Remove SPACE and
 *  TAB characters where appropriate, in order to preserve the layout
 *  of tables, etc. as much as possible.
 *
 *  worst case: strlen(iso) == 4*strlen(asc)
 */
  void
CorLatin1toASCII(iso, asc, t)
  unsigned char *iso, *asc;
  int t;
{
  char *p, **tab;
  int first;   /* flag for first SPACE/TAB after other characters */
  int i, a;    /* column counters in iso and asc */

  /* TABSTOP(x) is the column of the character after the TAB
     at column x. First column is 0, of course.              */
# define TABSTOP(x) (((x) - ((x)&7)) + 8)

  if (iso==NULL || asc==NULL) return;

  tab = iso2asc[t] - 0xa0;
  first = 1;
  i = a = 0;
  while (*iso) {
    if (*iso > 0x9f) {
      p = tab[*(iso++)]; i++;
      first = 1;
      while (*p) { *(asc++) = *(p++); a++; }
    } else {
      if (a > i && ((*iso == ' ') || (*iso == '\t'))) {
        /* spaces or TABS should be removed */
        if (*iso == ' ') {
          /* only the first space after a letter must not be removed */
          if (first) { *(asc++) = ' '; a++; first = 0; }
          i++;
        } else {   /* here: *iso == '\t' */
          if (a >= TABSTOP(i)) {
            /* remove TAB or replace it with SPACE if necessary */
            if (first) { *(asc++) = ' '; a++; first = 0; }
          } else {
            /* TAB will correct the column difference */
            *(asc++) = '\t';   /* = *iso */
            a = TABSTOP(a);    /* = TABSTOP(i), because i < a < TABSTOP(i) */
          }
          i = TABSTOP(i);
        }
        iso++;
      } else {
        /* just copy the characters and advance the column counters */
        if (*iso == '\t') {
          a = i = TABSTOP(i);  /* = TABSTOP(a), because here a = i */
        } else if (*iso == '\b') {
          a--; i--;
        } else {
          a++; i++;
        }
        *(asc++) = *(iso++);
        first = 1;
      }
    }
  }
  *asc = 0;

  return;
}


  int
main(argc, argv)
  int argc;
  char **argv;
{
  int table;
  int corr = 1, list = 0;
  int i,j,code;

  if (argc < 2) usage();
  table = argv[1][0] - '0';
  if (table < 0 || table >= ISO_TABLES || argv[1][1] != '\0') usage();
  for (i = 2; i < argc; i++)
    if (argv[i][0] == '-' || argv[i][0] == '/')
      switch(argv[i][1]) {
      case 's':
      case 'S':
        corr = 0;
        break;
      case 'l':
      case 'L':
        list = 1;
        break;
      default:
        usage();
      }
    else {
      if ((argv[i][0] == 's' || argv[i][0] == 'S') &&
          (argv[i][1] == 'u' || argv[i][1] == 'U') &&
          (argv[i][2] == 'b' || argv[i][2] == 'B') &&
          argv[i][3] == '=')
        for (j = 0x00; j < 0x60; j++) {
          if (iso2asc[table][j] == SUB) iso2asc[table][j] = argv[i] + 4;
        }
      else {
        if (sscanf(argv[i], "%i=%n", &code, &j) == EOF) usage();
        if (code < 160 || code > 255) usage();
        iso2asc[table][code - 160] = argv[i] + j;
      }
    }
  /* default SUB */
  for (j = 0x00; j < 0x60; j++)
    if (iso2asc[table][j] == SUB) iso2asc[table][j] = "?";

  if (list) {
    for (i = 0x00; i<0x60; i++)
      printf(((i & 15) == 15) ? "%4s\n" : "%4s ", iso2asc[table][i]);
    exit(0);
  }

  while (fgets(inbuf, BUF_LENGTH, stdin) != NULL) {
    if (corr) CorLatin1toASCII((unsigned char *) inbuf, 
                               (unsigned char *) outbuf, table);
    else Latin1toASCII((unsigned char *) inbuf, 
                       (unsigned char *) outbuf, table);
    if (fputs(outbuf, stdout) == EOF)
      perror("Error while writing output in iso2asc");
  }
  if (ferror(stdin)) perror("Error while reading input in iso2asc");
  return(0);
}
