/* Sample solution for Deep Magic with Lex & Yacc, IAP 1999, Problem 2.Extra */

%{
#include <stdio.h>
#include <assert.h>

int current_line;
char *input_file_name;

/* Token types, other than character values */
#define TOK_ID		256	/* identifier */
#define TOK_STRING	257	/* string literal */
#define TOK_AUTO	258	/* keywords... */
#define TOK_BREAK	259
#define TOK_CASE	260
#define TOK_CHAR	261
#define TOK_CONST	262
#define TOK_CONTINUE	263
#define TOK_DEFAULT	264
#define TOK_DO		265
#define TOK_DOUBLE	266
#define TOK_ELSE	267
#define TOK_ENUM	268
#define TOK_EXTERN	269
#define TOK_FLOAT	270
#define TOK_FOR		271
#define TOK_GOTO	272
#define TOK_IF		273
#define TOK_INT		274
#define TOK_LONG	275
#define TOK_REGISTER	276
#define TOK_RETURN	277
#define TOK_SHORT	278
#define TOK_SIGNED	279
#define TOK_SIZEOF	280
#define TOK_STATIC	281
#define TOK_STRUCT	282
#define TOK_SWITCH	283
#define TOK_TYPEDEF	284
#define TOK_UNION	285
#define TOK_UNSIGNED	286
#define TOK_VOID	287
#define TOK_VOLATILE	288
#define TOK_WHILE	289
#define TOK_CINTEGER	290	/* constant integer */
#define TOK_CFLOAT	291	/* constant floating point */
#define TOK_INCR	292	/* ++ */
#define TOK_DECR	293	/* -- */
#define TOK_LSHIFT	294	/* << */
#define TOK_RSHIFT	295	/* >> */
#define TOK_GTEQ	296	/* >= */
#define TOK_LTEQ	297	/* <= */
#define TOK_EQ		298	/* == */
#define TOK_NOTEQ	299	/* != */
#define TOK_LAND	300	/* && */
#define TOK_LOR		301	/* || */
#define TOK_TIMESASN	302	/* *= */
#define TOK_DIVIDEASN	303	/* /= */
#define TOK_MODASN	304	/* %= */
#define TOK_PLUSASN	305	/* += */
#define TOK_MINUSASN	306	/* -= */
#define TOK_LSHIFTASN	307	/* <<= */
#define TOK_RSHIFTASN	208	/* >>= */
#define TOK_ANDASN	209	/* &= */
#define TOK_ORASN	210	/* |= */
#define TOK_XORASN	211	/* ^= */
#define TOK_DOTS	212	/* ... */
#define TOK_POINTS	213	/* -> */

struct constant
{
  int bits;
  union 
    {
      long intval;
      long double floatval;
    } value;
};

union token_value
{
  char *idval;
  struct constant *constval;
  char *stringval;
} yylval;

%}

[ \t\f\v\r]	/* do nothing */

\n		current_line++;

/* Match #line directives */
^#[ \t]+[0-9]+[ \t]+\"[^"\n]*\"[^n]*\n		|
^#line[ \t]+[0-9]+[ \t]+\"[^"\n]*\"[^n]*\n	process_line_change (yytext);

/* One character operators and punctuators */
[][().&*+~!/%^|?:=,{};-]	return *yytext;

/* Multi-character operators and the multi-character punctuator */
"++"		return TOK_INCR;
"--"		return TOK_DECR;
"->"		return TOK_POINTS;
"<<"		return TOK_LSHIFT;
">>"		return TOK_RSHIFT;
"<="		return TOK_LTEQ;
">="		return TOK_GTEQ;
"=="		return TOK_EQ;
"!="		return TOK_NOTEQ;
"&&"		return TOK_LAND;
"||"		return TOK_LOR;
"*="		return TOK_TIMESASN;
"/="		return TOK_DIVIDEASN;
"+="		return TOK_PLUSASN;
"-="		return TOK_MINUSASN;
"%="		return TOK_MODASN;
"<<="		return TOK_LSHIFTASN;
">>="		return TOK_RSHIFTASN;
"&="		return TOK_ANDASN;
"|="		return TOK_ORASN;
"..."		return TOK_DOTS;

/* Keywords---must precede the rule for identifiers below */
auto			return TOK_AUTO;
break			return TOK_BREAK;
case			return TOK_CASE;
char			return TOK_CHAR;
const			return TOK_CONST;
continue	        return TOK_CONTINUE;
default			return TOK_DEFAULT;
do			return TOK_DO;
double			return TOK_DOUBLE;
else			return TOK_ELSE;
enum			return TOK_ENUM;
extern			return TOK_EXTERN;
float			return TOK_FLOAT;
for			return TOK_FOR;
goto			return TOK_GOTO;
if			return TOK_IF;
int			return TOK_INT;
long			return TOK_LONG;
register		return TOK_REGISTER;
return			return TOK_RETURN;
short			return TOK_SHORT;
signed			return TOK_SIGNED;
sizeof			return TOK_SIZEOF;
static			return TOK_STATIC;
struct			return TOK_STRUCT;
switch			return TOK_SWITCH;
typedef			return TOK_TYPEDEF;
union			return TOK_UNION;
unsigned		return TOK_UNSIGNED;
void			return TOK_VOID;
volatile		return TOK_VOLATILE;
while			return TOK_WHILE;

/* Identifiers */
[a-zA-Z_][a-zA-Z_0-9]*	return generate_identifier_token (yytext);

/* String literals */
L?\"			return scan_string_literal ();

/* Character constants */
L?\'			return scan_character_constant ();

/* Integer constants */
0x[0-9a-fA-F]+[lLuU]	return convert_integer (yytext + 2, 16);
0[0-9]*[lLuU]		return convert_integer (yytext, 8);
[1-9][0-9]*[lLuU]	return convert_integer (yytext, 10);

/* Floating constants */
[0-9]+(.[0-9]*)?[Ee][+-]?[0-9]*[fFlL]	return convert_float (yytext);
[0-9]+.[0-9]*[fFlL]			return convert_float (yytext);

%%

char *
copy_string (char *s)
{
  char *s2 = malloc (strlen (s) + 1);
  assert (s2);
  strcpy (s2, s);
  return s2;
}

int
generate_identifier_token (char *name)
{
  yylval.idval = copy_string (name);
  return TOK_ID;
}

int
scan_backslash_escape ()
{
  int c;
  int ndigits, total;
  
  c = input ();
  
  switch (c)
    {
    case 'a':
      return '\a';
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case '\t':
      return '\t';
    case '\v':
      return '\v';

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
      total = c - '0';

      c = input ();
      if ((c >= '0') && (c <= '7'))
	total = total * 8 + c - '0';
      else
	{
	  unput (c);
	  return total;
	}
      
      c = input ();
      if ((c >= '0') && (c <= '7'))
	total = total * 8 + c - '0';
      else
	unput (c);

      return total;

    case 'x':
      c = input ();
      if ((c >= '0') && (c <= '9'))
	total = c - '0';
      else if ((c >= 'a') && (c <= 'f'))
	total = 10 + c - 'a';
      else if ((c >= 'A') && (c <= 'F'))
	total = 10 + c - 'A';
      else
	{
	  unput (c);
	  return 'x';
	}
      
      c = input ();
      if ((c >= '0') && (c <= '9'))
	total = total * 16 + c - '0';
      else if ((c >= 'a') && (c <= 'f'))
	total = total * 16 + 10 + c - 'a';
      else if ((c >= 'A') && (c <= 'F'))
	total = total * 16 + 10 + c - 'A';
      else
	unput (c);

      return total;
      
    default:
      return c;
    }
}
	

int
scan_string_literal ()
{
  char *buffer, *bp;
  int buflen;
  int c;

  buflen = 50;
  bp = buffer = malloc (50);
  assert (buffer);
  
  while ((c = input ()) != '\"')
    {
      if (c == '\n')
	/* print the error, but scan it normally */
	print_error ("Illegal newline in string literal");
	
      if (c == '\\')
	c = scan_backslash_escape ();

      if (c == EOF)
	{
	  print_error ("End of file in string literal");
	  return 0;
	}

      if (bp - buffer >= buflen)
	  {
	    buffer = realloc (buffer, buflen *= 2);
	    bp = buffer + buflen / 2;
	  }

      *bp++ = c;
    }
  
  yylval.stringval = buffer;
  return TOK_STRING;
}

int
scan_character_constant ()
{
  int c;
  struct constant result;

  c = input ();
  
  if (c == '\n')
    /* print the error, but scan it normally */
    print_error ("Illegal newline in character constant");

  if (c == '\'')
    {
      unput (c);
      c = 0;
    }
  
  if (c == '\\')
    c = scan_backslash_escape ();
  
  if (c == EOF)
    {
      print_error ("End of file in character constant");
      return 0;
    }

  result = malloc (sizeof (struct constant));
  assert (result);
  
  result.bits = 8;
  result.value.intval = c;
  
  /* Eat up the rest of the constant */
  while ((c = input ()) != '\'')
    {
      if (c == '\n')
	print_error ("Illegal newline in character constant");

      if (c == EOF)
	{
	  print_error ("End of file in character constant");
	  return 0;
	}

      if (c == '\\')
	c = scan_backslash_escape ();
    }
  
  yylval.constval = result;
  return TOK_CINTEGER;
}


