
%{
/*
 * (C) 1988 by Adobe Systems Incorporated. All rights reserved.
 *
 * This file may be freely copied and redistributed as long as:
 *   1) This entire notice continues to be included in the file, 
 *   2) If the file has been modified in any way, a notice of such
 *      modification is conspicuously indicated.
 *
 * PostScript, Display PostScript, and Adobe are registered trademarks of
 * Adobe Systems Incorporated.
 * 
 * ************************************************************************
 * THE INFORMATION BELOW IS FURNISHED AS IS, IS SUBJECT TO CHANGE WITHOUT
 * NOTICE, AND SHOULD NOT BE CONSTRUED AS A COMMITMENT BY ADOBE SYSTEMS
 * INCORPORATED. ADOBE SYSTEMS INCORPORATED ASSUMES NO RESPONSIBILITY OR 
 * LIABILITY FOR ANY ERRORS OR INACCURACIES, MAKES NO WARRANTY OF ANY 
 * KIND (EXPRESS, IMPLIED OR STATUTORY) WITH RESPECT TO THIS INFORMATION, 
 * AND EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR PARTICULAR PURPOSES AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
 * ************************************************************************
 */

/*
	This is a "lex" source file for PostScript language syntax.
	the #include statement points to a file of C definitions for
	actions related to the PostScript data types as recognized
	by the scanner:
		strings		integers
		radixnumbers	curlybraces
		names		/literalnames
		comments	hexstrings

	There is one procedure call for each of these types recognized.
	the "prettyprint.c" file will reformat the PostScript file
	according to some stylistic constraints which are primarily
	indentation rules for procedure bodies.  This can be expanded
	for other purposes as well.

	BUGS:
	  * Strings with newlines are not handled correctly.
	  * Radix notation is too hard for things like 19#12defghi3.
	    It's too hard to tell them from executable names with
	    just regular expressions.  This should be second-checked
	    by the C program.
	  * <hex strings> are treated specially by the PostScript
	    scanner and a "syntaxerror" is raised if there is anything
	    but legal hexadecimal characters inside the <> brackets.
	    The regular expression rules in this file cannot deal
	    correctly with that yet.
	  * Scientific notation for numbers has not yet been tackled.
	  * Handling numbers in light of the radix notation is quite
	    sticky; I think it works correctly, but the PostScript
	    scanner is tolerant of a lot of weirdnesses relating to
	    numeric formats, and this should be checked as a possible
	    source of bugs.
*/

/*
    These routines are defined externally.  For the "psformat" program,
    they are defined in "prettyprint.c".
 */
extern	structurecomment();
extern	firstcolcomment();
extern	comment();
extern	opencurly();
extern	closecurly();
extern	integer();
extern	radix();
extern	exname();
extern	litname();
extern	newline();
extern	exname();

/*
    LEX rules start here.
 */
%}

/* NAMECHARS [!-$&'*-.0-9:;=?@A-Z^-z|~] */
NAMECHARS [!"#$&'*+,\-.0-9:;=?@A-Z^_`a-z|~]
MINIMALNAMECHARS [a-zA-Z0-9_\-]
HEXCHARS [0-9a-fA-F \t\n]
W [ \n\t]
SELFDELIMITING [)\]} \n\t]
T [([{/ \t\n]

%%

\(.*\)		|
\<.*\>		{
	/* STRINGS are the most crucial part; they come first! */
	/* [currently does not deal with strings with newlines in them] */
	string(yytext);
 }

^%"!".*		|
^%%"+".*	|
^%%[A-Z]+.*	{
	/* COMMENTS */
	structurecomment(yytext);
}
^\%.*		{
	firstcolcomment(yytext);
}
\%.*		{
	comment(yytext);
 }

{W}*\{{W}	|
\{		{
	/* PROCEDURES */
	opencurly();
 }

{W}*\}		|
\}		{
	closecurly();
}

-[0-9]+/{T}+	|
[0-9]+/{T}+	{
	/* INTEGERS */
	integer(yytext);
 }

"1"[0-6]#{HEXCHARS}+/{T}+	{
	/* RADIX NUMBERS */
	/* special case for 16#12345abcdef, etc. */
	radix(yytext);
}

[0-9]#[0-9]+/{T}+	{
	/* radix numbers in range 1#whatever to 9#whatever */
	radix(yytext);
 }

\[		{
	/* ARRAYS */
	/* for most purposes, these can be treated as executable names */
	exname(yytext);
 }

\]		{
	exname(yytext);
 }

\/{NAMECHARS}+	{
	/* LITERAL NAMES */
	litname(yytext);
 }

"def"		{
	/* SPECIAL CASE FOR "def" to add NEWLINE */
	/* It is rare that you want a line to continue after a "def".
	   Typically this is only done when a program is compacted to
	   remove unnecessary white space, and the "psformat" program
	   tries to undo all that kind of stuff.
	*/
	specialexname(yytext);
 }

{NAMECHARS}+	{
	exname(yytext);
 }

\n		{
	/* NEWLINE */
	newline();
 }

[ \t]	;

%%

main() {
	initialize();
	yylex();
	exit(0);
}
