/*****
* miniparse.h : required header file when compiling the parser standalone.
*
* This file Version	$Revision: 1.3 $
*
* Creation date:		Wed Mar 19 17:26:15 GMT+0100 1997
* Last modification: 	$Date: 1998/02/12 03:09:33 $
* By:					$Author: unammx $
* Current State:		$State: Exp $
*
* Author:				newt
*
* Copyright (C) 1994-1997 by Ripley Software Development 
* All Rights Reserved
*
* This file is part of the XmHTML Widget Library.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*****/
/*****
* $Source: /debian/home/gnomecvs/gnome-libs/gtk-xmhtml/miniparse.h,v $
*****/
/*****
* ChangeLog 
* $Log: miniparse.h,v $
* Revision 1.3  1998/02/12 03:09:33  unammx
* Merge to Koen's XmHTML 1.1.2 + following fixes:
*
* Wed Feb 11 20:27:19 1998  Miguel de Icaza  <miguel@nuclecu.unam.mx>
*
* 	* gtk-forms.c (freeForm): gtk_destroy_widget is no longer needed
* 	with the refcounting changes.
*
* 	* gtk-xmhtml.c (gtk_xmhtml_remove): Only god knows why I was
* 	adding the just removed widget.
*
* Revision 1.2  1997/12/29 22:16:32  unammx
* This version does:
*
*    - Sync with Koen to version Beta 1.1.2c of the XmHTML widget.
*      Includes various table fixes.
*
*    - Callbacks are now properly checked for the Gtk edition (ie,
*      signals).
*
* Revision 1.1  1997/12/25 01:34:13  unammx
* Good news for the day:
*
*    I have upgraded our XmHTML sources to XmHTML 1.1.1.
*
*    This basically means that we got table support :-)
*
* Still left to do:
*
*    - Set/Get gtk interface for all of the toys in the widget.
*    - Frame support is broken, dunno why.
*    - Form support (ie adding widgets to it)
*
* Miguel.
*
* Revision 1.6  1997/10/23 00:30:39  newt
* XmHTML Beta 1.1.0 release
*
* Revision 1.5  1997/08/30 02:04:25  newt
* _XmHTMLWarning proto changes.
*
* Revision 1.3  1997/05/28 01:56:39  newt
* Added my_strdup.
*
* Revision 1.2  1997/04/29 14:31:41  newt
* Removed unused structures.
*
* Revision 1.1  1997/03/20 08:01:55  newt
* Initial Revision
*
*****/ 
#ifndef _miniparse_h_
#define _miniparse_h_

#ifndef MINIPARSE
#define MINIPARSE 1
#endif

#include <sys/time.h>
#include <unistd.h>

#ifdef __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif

#include <errno.h>	/* perror */


/* required typedefs */
typedef char* String;
typedef unsigned char Byte;
typedef unsigned char Boolean;
typedef unsigned short Dimension;
typedef unsigned char* Widget;
typedef Widget XmHTMLWidget;

/* Set to False if you don't want any warnings being issued */
extern Boolean parser_warnings;

/* Running count of encountered errors */
extern int parser_errors;

/* Count of HTML segments in the input text */
extern int parsed_object_count;

/* Count of text segments in the input text */
extern int parsed_text_object_count;

/* Set to False if you want the parser to be a bit more lenient */
extern Boolean parser_strict_checking;

/* Set to True if you want to see debug output */
extern Boolean parser_debug;

/* Set to True if you want to get timings from the parser tree verification */
extern Boolean parser_verification_timings;

/* we always set debug flag in here */
#ifndef DEBUG
#define DEBUG	1
#endif

#ifndef True
#  define True 1
#  define False 0
#endif

/*****
* HTML Elements internal id's
* This list is alphabetically sorted to speed up the searching process.
* DO NOT MODIFY
*****/
typedef enum{
HT_DOCTYPE, HT_A, HT_ADDRESS, HT_APPLET, HT_AREA, HT_B, HT_BASE, HT_BASEFONT, 
HT_BIG, HT_BLOCKQUOTE, HT_BODY, HT_BR, HT_CAPTION, HT_CENTER, HT_CITE, HT_CODE,
HT_DD, HT_DFN, HT_DIR, HT_DIV, HT_DL, HT_DT, HT_EM, HT_FONT, HT_FORM, HT_FRAME,
HT_FRAMESET, HT_H1, HT_H2, HT_H3, HT_H4, HT_H5, HT_H6, HT_HEAD, HT_HR, HT_HTML,
HT_I, HT_IMG, HT_INPUT, HT_ISINDEX, HT_KBD, HT_LI, HT_LINK, HT_MAP, HT_MENU,
HT_META, HT_NOFRAMES, HT_OL, HT_OPTION, HT_P, HT_PARAM, HT_PRE, HT_SAMP,
HT_SCRIPT, HT_SELECT, HT_SMALL, HT_STRIKE, HT_STRONG, HT_STYLE, HT_SUB,
HT_SUP, HT_TAB, HT_TABLE, HT_TD, HT_TEXTAREA, HT_TH, HT_TITLE,
HT_TR, HT_TT, HT_U, HT_UL, HT_VAR, HT_ZTEXT
}htmlEnum;

/***** 
* and corresponding name table, defined in parse.c
*****/
extern String html_tokens[];

/* elements for which a closing counterpart is optional */
#define OPTIONAL_CLOSURE(id) ((id) == HT_DD || (id) == HT_DT || \
	(id) == HT_LI || (id) == HT_P || (id) == HT_OPTION || (id) == HT_TD || \
	(id) == HT_TH || (id) == HT_TR)

/* physical/logical markup elements */
#define IS_MARKUP(id) ((id) == HT_TT || (id) == HT_I || (id) == HT_B || \
	(id) == HT_U || (id) == HT_STRIKE || (id) == HT_BIG || (id) == HT_SMALL || \
	(id) == HT_SUB || (id) == HT_SUP || (id) == HT_EM || (id) == HT_STRONG || \
	(id) == HT_DFN || (id) == HT_CODE || (id) == HT_SAMP || (id) == HT_KBD || \
	(id) == HT_VAR || (id) == HT_CITE || (id) == HT_FONT)

/* text containers */
#define IS_CONTAINER(id) ((id) == HT_BODY || (id) == HT_DIV || \
	(id) == HT_CENTER || (id) == HT_BLOCKQUOTE || (id) == HT_FORM || \
	(id) == HT_TH || (id) == HT_TD || (id) == HT_DD || (id) == HT_LI || \
	(id) == HT_NOFRAMES)

/* all elements that may be nested */
#define NESTED_ELEMENT(id) (IS_MARKUP(id) || (id) == HT_APPLET || \
	(id) == HT_BLOCKQUOTE || (id) == HT_DIV || (id) == HT_CENTER || \
	(id) == HT_FRAMESET)

/* other elements */
#define IS_MISC(id) ((id) == HT_P || (id) == HT_H1 || (id) == HT_H2 || \
	(id) == HT_H3 || (id) == HT_H4 || (id) == HT_H5 || (id) == HT_H6 || \
	(id) == HT_PRE || (id) == HT_ADDRESS || (id) == HT_APPLET || \
	(id) == HT_CAPTION || (id) == HT_A || (id) == HT_DT)


/*****
* possible error codes for XmNparserCallback
*****/
typedef enum{
	HTML_UNKNOWN_ELEMENT = 1,	/* unknown HTML element */
	HTML_BAD,					/* very badly placed element */
	HTML_OPEN_BLOCK,			/* block still open while new block started */
	HTML_CLOSE_BLOCK,			/* block closed but was never opened */
	HTML_OPEN_ELEMENT,			/* unbalanced terminator */
	HTML_NESTED,				/* improperly nested element */
	HTML_VIOLATION,				/* bad content for current block/element */
	HTML_NOTIFY,				/* insertion of optional opening/closing */
	HTML_INTERNAL				/* internal parser error */
}parserError;

/*****
* And corresponding values for XmNenableBadHTMLWarnings.
* These are or'd together.
* XmNONE disables warnings and XmHTML_ALL enables all warnings.
* See parserError for their meaning.
*****/
enum{
	XmHTML_NONE = 0,						/* no warnings */
	XmHTML_UNKNOWN_ELEMENT = 1,	
	XmHTML_BAD = 2,
	XmHTML_OPEN_BLOCK = 4,
	XmHTML_CLOSE_BLOCK = 8,
	XmHTML_OPEN_ELEMENT = 16,
	XmHTML_NESTED = 32,
	XmHTML_VIOLATION = 64,
	XmHTML_ALL = 128				/* all warnings */
};

/*****
* Definition of parsed HTML elements
*****/
typedef struct _XmHTMLObject{
	htmlEnum 	id;			/* ID for this element */
	String		element;	/*
							* Raw text. For HTML elements, freeing this 
							* member also frees attributes.
							*/
	String 		attributes;	/* attributes for this element, if any */
	Boolean		ignore;		/* true if element must be ignored */
	Boolean		is_end;		/* true when this is a closing element */
	Boolean 	terminated;	/* true when element has a closing counterpart */
	Boolean		auto_insert;/* auto inserted element */
	int 		line;		/* line number for this element */
	struct _XmHTMLObject *next;
	struct _XmHTMLObject *prev;
}XmHTMLObject;

/*****
* Function to be called when the parser finished a single pass on the input
*
* ARGS:
*   First : The current list of parser objects, which may NOT be freed.
*   Second: True if input was HTML3.2 conforming, False if not;
*   Third : True if parser verification succeeded;
*   Fourth: True if parser tree was balanced;
*   Fifth : current parser pass (count starts at 0);
*   Sixth : length of input text;
* Return values:
*   True  : make another pass on the input using the current (possibly
*           repaired) output;
*   False : don't make another pass on the input;
*****/
typedef Boolean (*ParserDocumentCallback)(XmHTMLObject*, Boolean, Boolean,
	Boolean, int, int);
extern ParserDocumentCallback parser_document_callback;

/*****
* Function to be called upon completion of a single pass
* ARGS:
*   First : number of elements still on stack (only when document is
*           unbalanced);
*   Second: number of (missing) HTML tags inserted by the parser;
*   Third : number of HTML tags ignored by the parser;
* Return values:
*   None.
*****/
typedef void (*ParserAutoCorrectCallback)(int, int, int);
extern ParserAutoCorrectCallback parser_autocorrect_callback;

/*****
* Parser state stack object
*****/
typedef struct _stateStack{
	htmlEnum id;							/* current state id */
	struct _stateStack *next;				/* ptr to next record */
}stateStack;

/*****
* A Parser
*****/
typedef struct _Parser{
	String source;				/* text being parsed					*/
	int index;					/* last known position					*/
	int len;					/* length of input text					*/
	int num_lines;				/* current line count					*/
	Dimension line_len;			/* maximum line length so far			*/
	Dimension cnt;				/* current line length					*/

	/* running list of inserted elements */
	int num_elements;			/* no of tags inserted so far			*/
	int num_text;				/* no of text elements inserted so far	*/
	XmHTMLObject *head;			/* head of object list					*/
	XmHTMLObject *current;		/* lastly inserted element				*/

	stateStack state_base;		/* stack base point						*/
	stateStack *state_stack;	/* actual stack							*/

	int cstart;					/* current element start position		*/
	int cend;					/* current element end position			*/

	Boolean strict_checking;	/* HTML 3.2 looseness flag				*/
	Boolean have_body;			/* indicates presence of <body> tag		*/
	Boolean warn;				/* warn about bad html constructs		*/
	Boolean bad_html;			/* bad HTML document flag				*/
	Boolean html32;				/* HTML32 conforming document flag		*/
	Boolean	automatic;			/* when in automatic mode				*/

	Widget widget;				/* for the warning messages				*/
}Parser;

/*****
* Various helper functions used by the parser (and defined by the parser
* when it's compiled with -DMINIPARSE
*****/

extern void my_locase(char *string);
extern char* my_strcasestr(const char *s1, const char *s2);
extern char* my_strndup(const char *s1, size_t len);
extern char* my_strdup(const char *s1);

/*****
* The parser uses strcasecmp and strncasecmp. Since these do not exist
* on every system, the parser carriers fallback copies which will be used
* if you define -DNEED_STRCASECMP at compile time.
*****/
#ifdef NEED_STRCASECMP

extern int my_strcasecmp (const char *s1, const char *s2);
extern int my_strncasecmp (const char *s1, const char *s2, size_t n);

#define strcasecmp(S1,S2) my_strcasecmp(S1,S2)
#define strncasecmp(S1,S2,N) my_strncasecmp(S1,S2,N)

#endif

/*****
* Warning message display function
* When parser_warnings has been set to False, no warnings will be
* generated.
*****/
#define __WFUNC__(WIDGET_ID, FUNC)	(Widget)WIDGET_ID, __FILE__, \
	 __LINE__, FUNC

extern void __XmHTMLWarning(
#ifdef __STDC__ 
	Widget w, String module, int line, String routine,
	String fmt, ...
#endif
);

#define _XmHTMLWarning __XmHTMLWarning

/*****
* Public Parser Functions
*****/

/*****
* Write the list of objects to the given file. If notext is True, HTML
* text segments will not be included in the output file.
*****/
extern void ParserWriteOutputToFile(XmHTMLObject *objects, String prefix,
	Boolean notext);

/* Write the list of objects to the given file as a HTML file */
extern void ParserWriteHTMLOutputToFile(XmHTMLObject *objects, String prefix,
	Boolean notext);

/* compose a HTML output string from the list of objects */
extern String _XmHTMLTextGetString(XmHTMLObject *objects);

/* free the given list of objects */
extern void _XmHTMLFreeObjects(XmHTMLObject *objects);

/*****
* The parser. Takes a two widgets, a previous list of objects and the text
* to be parsed as it's input.
* Returns a list of parsed objects.
*****/
extern XmHTMLObject *_XmHTMLparseHTML(XmHTMLWidget html,
	XmHTMLObject *old_list, char *input, XmHTMLWidget dest);

/* Don't add anything after this endif! */
#endif /* _miniparse_h_ */
