#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
/* OSF seems to need this */
/*#include <unistd.h>*/

/* maximum number of tags per line */
#define MAXTAGS 25
/* minimum column for right margin */
#define MINRM 60
#define MINMID 20
/*#define MAXINDENT 15*/
#define SENT 10
#define HEADFOOTMATCH 20
#define HEADFOOTSKIP 20

/*
  change backspace/underline text into Tk text tags
  -- used to do this in Tcl, but it was too slow

  I hope the compiler has good common subexpression elimination
     for all the pointer arithmetic.


  Copyright (c) 1993  T.A. Phelps (phelps@cs.Berkeley.EDU)
     written March 24, 1993

    2-Apr  bullets, change bars, copyright symbol
    5      boldface, other SGI nicks
    7      skip unrecognized escape codes
   10      small caps
   13      underscores considered uppercase so show up
              in default small caps font
           screen out Ultrix junk (code getting pretty tangled now)
   14      until Tk text has better tab support, replace tabs by
           spaces until get to next tab stop (for Ultrix)
           -t gives tabstop spacing
   20      Solaris support (Larry Tsui)
    3-Jun  section subheading parsing (Per-Erik Martin)
   28      hyphenated man pages in SEE ALSO show up correctly in Links
           (Mike Steele)
   13-Jul  under FILES, fully qualified path names are added to Links,
              but this taken out immediately because not useful
   14      option to keep changebars on right (Warren Jessop)
    5-Aug  search for header, footer dynamically--
              no need to edit or search large list of patterns
   11      -m kicks in man page formatting beyond nroff backspace kludges
   27      handle double digit numbers better by trying again relative to end of line
   19-Sep  -T gives Tk extras (otherwise ASCII only)
           -H gives headers only (implies -T off)
   10-Oct  -r reverse compiles to [tn]roff source (as Geoff Collyer's nam and fontch,
           but leveraging existing analysis so only addition of ~60 lines)
           (The code is device-driver obscure now--obfuscated C contest next.)
   13      header and footer optionally available at bottom in Tk view
           (Marty Leisner)
   19      "reflected" odd and even page headers&footers zapped
   20      keep count of sections and subsections, using smaller font for larger numbers
*/


/* TO DO ****
   reverse compile to html, sgml also
*/



/*** tag management ***/

enum tagtype { TITLE, ITALICS, BOLD, SYMBOL, SMALLCAPS, BOLDITALICS };
char *tagstrings[] = {
	"title", "high", "bold", "symbol", "sc", "bi"
};
char *menufonts[] = { "$manx(gui,font)", "$man(menu,smallfont)" };


struct {
	int type;
	int line;
	int first;
	int last;
} tags[MAXTAGS];

int tagc=0;


/*** globals ***/

int fTabstops=8;
int fSubsections=0;	/* extract subsection titles too? */
int fChangeleft=0;	/* move change bars to left */
int fMan=0;		/* invoke agressive man page filtering? */
int fTk=0;		/* add Tk wrappers for TkMan? */
int fRoff=0;		/* make source for [tn]roff */
int fHeaders=0;	/* show section and subsection titles only? */
int fHeadfoot=0;	/* show canonical header and footer at bottom? */
char *manName="TkMan";
int manSect=1;



/*** utility functions ***/

void
addtag(int type, int line, int first, int last)
{
	if (tagc<MAXTAGS) {
		tags[tagc].type = type;
		tags[tagc].line = line;
		tags[tagc].first = first;
		tags[tagc].last = last;
		tagc++;
	}
}
void
simplefilter(char *s, char *b)
{
	int i=0;

	for (; *s; s++) {
		if (*s=='\b') i++;
		else if (i) i--;
		else *b++=*s;
	}
	*b='\0';
}

void manInsert(char *s) {printf("$w.show insert end %s\n", s);}
void manInsertProtect(char *s) {printf("$w.show insert end {%s}\n", s);}
void manInsertN(char *s) {manInsertProtect(s); manInsert("\\n");}
void
manTag(enum tagtype tag, int line1, int char1, int linen, int charn)
{
	printf("$w.show tag add %s %d.%d %d.%d\n",
		  tagstrings[tag], line1, char1, linen, charn);
}
void
manStrip(char *s, int line) {
	char buf[BUFSIZ];
	simplefilter(s,buf);
	manInsertN(buf);
	manTag(SMALLCAPS, line, 0, line, 100);
}



/*** Kong ***/

void
filter(void)
{
	char buf[BUFSIZ+SENT];
	char plain[BUFSIZ];
	char *p;
	char header[BUFSIZ]="";		/* complete line */
	char footer[BUFSIZ]="";
	char head[BUFSIZ]="";		/* first "word" */
	char foot[BUFSIZ]="";
	char header2[BUFSIZ]="";		/* SGIs have two lines of headers and footers */
	char footer2[BUFSIZ]="";
	int header_m=0, footer_m=0;
	int headlen=0, footlen=0;
	int linelen;
	int fFoot=0;
	int line=1;
     int i,j,ci,rci,hl,maybesc;
	int sect,subsect,osubsect=0;
	int title=1;
	char *headfoot = "Header and Footer";
	char *bads = "\\\"[]$";
	char *menubads = "\\\"[]$|{}";
	int sectcnt=0;
     int seealso=0;
	int ncnt=0,scnt=0,oscnt=-1;
	int tt=-1;
	int indent=0;
	int empty=0,oempty;
	int rscnt=0,fIP=0,fHyph=0;

	/* initialize sentinals */
     for (i=0;i<SENT;i++) buf[BUFSIZ+i]='\0';


	if (fRoff) {
		printf(".TH %s %d \"generated by TkMan\" UCB\n",manName,manSect);
		printf(".\\\"  man page source generated by bs2tk,\n");
		printf(".\\\"  part of TkMan by Tom Phelps\n");
	}

	if (fMan) indent=-1;

	/* run through each line */
	while (gets(buf)!=NULL) {
		/* i = absolute index into line, ci = current character # for Tk tags */
		i=0; ci=0;

		while (buf[i]=='\x1b'&&buf[i+1]) i+=2;	/* grrr, xman */

		/*** determine header and global indentation ***/
		/* could have used expand (easier but slower) */
		if (indent==-1) {
			for (; buf[i] /*&& ci<MAXINDENT*/; i++,ci++) {
				if (buf[i]=='\t') ci+=fTabstops-1;
				else if (!isspace(buf[i])) {
					if (*header=='\0') {
						/* grab header and its first word */
						strcpy(header,&buf[i]);
						linelen=strlen(&buf[i]);
						if ((header_m=linelen-HEADFOOTSKIP)<0) header_m=0;
						for (j=i, p=&buf[i]; 
							(*p && !isspace(*p)) || (p[1] && !isspace(p[1]) /*&& !isdigit(p[1])*/);
							j++,p++)
							 /* empty */;
						headlen = j-i;
						strncpy(head,&buf[i],headlen); buf[headlen]='\0';
						gets(buf);
						for (p=buf; *p && isspace(*p); p++) /*empty */;
						strcpy(header2,p);
						break;
					} else {
						/* check '<' for Plan 9(?) */
						if (buf[i]!='<') indent=ci;
						break;
					}
				}
			}
			if (indent==-1) continue;
		}


		/* for each ordinary line... */

		/* skip over global indentation, mindful of EOL and tabs */
		scnt=0;
		p=&buf[i];
		for (; *p && ci<indent; ci++,i++,p++)
			if (*p=='\t') ci+=(fTabstops-1);
		oempty=empty; empty=(*p=='\0');
		if (empty) {ncnt++; continue;}
		if (ci>indent) scnt=ci-indent;


		/* skip over (additional) initial spaces */
		for (; *p && isspace(*p); p++) {
			if (*p=='\t') scnt+=(fTabstops-1);
			scnt++;
		}
		linelen=strlen(p);


		/*** strip out per-page titles ***/

		if (fMan && (scnt==0 || scnt>MINMID)) {
/*printf("***ncnt = %d, fFoot = %d, line = %d***", ncnt,fFoot,line);*/
			if (!fFoot && ncnt>=2 && line+ncnt>50 && line+ncnt<100
			    && !isspace(*p) && (scnt>5 || (*p!='-' && *p!='_'))) {
				/* grab footer and its first word */
				fFoot=1;
				strcpy(footer,&buf[i]);
				if ((footer_m=linelen-HEADFOOTSKIP)<0) footer_m=0;
				for (j=i+scnt, footlen=0;
					(buf[j] && !isspace(buf[j])) ||
					(buf[j+1] && !isspace(buf[j+1]) /*&& !isdigit(buf[j+1])*/);
					j++,p++,footlen++)
					 /* empty */;
				strncpy(foot,&buf[i+scnt],footlen); buf[footlen]='\0';
				gets(buf);
				for (p=buf; *p && isspace(*p); p++) /*empty */;
				strcpy(footer2,p);
				title=1; continue;
			} else 
				/* a lot of work, but only for a few lines (about 4%) */
				if (fFoot &&
					 (   (headlen && strncmp(head,p,headlen)==0)
					  || strcmp(header2,p)==0
					  || (footlen && strncmp(foot,p,footlen)==0)
					  || strcmp(footer2,p)==0
					  /* try to recognize lines with dates and page numbers */
					  || (header_m && header_m<linelen &&
						 strcmp(&header[header_m],&p[header_m])==0)
					  || (footer_m && footer_m<linelen &&
						 strcmp(&footer[footer_m],&p[footer_m])==0)
					  /* or with reflected odd and even pages */
					  || (headlen && headlen<linelen &&
						 strncmp(head,&p[linelen-headlen],headlen)==0)
					  || (footlen && footlen<linelen &&
						 strncmp(foot,&p[linelen-footlen],footlen)==0)
					  )) {
				title=1; continue;
			}

			/* page numbers at end of line */
			for(i=0; p[i] && isdigit(p[i]); i++)
				/* empty */;
			if (!p[i]) {title=1; fFoot=1; continue;}
		}


		/*** interline spacing ***/
		sect = (fMan && scnt==0 && (isupper(*p)||(linelen>3&&*p=='_'&&p[1]=='\x08'&&isupper(p[2]))));
		if (fTk) printf("$w.show insert end \"");

		if (title) ncnt=(scnt!=oscnt || isupper(p[0]));
		if (line>1) {	/* gobble all newlines before first text line */
			for (i=0; i<ncnt; i++) {
				if (fTk) {putchar('\\'); putchar('n');}
				else if (fRoff) {printf(".LP\n"); break;}
				else if (!fHeaders) putchar('\n');
			}
			line+=ncnt;
		}
		ncnt=0; title=0;


		/*** process line, looking for special ones ***/
		ci=scnt;

		/*** change bars ***/
		i = linelen-1;
		/*i = strlen(p)-1;*/		/* gets should return this */
          if (!fHeaders && fChangeleft && i+scnt>MINRM) {
			for (; i>=0 && p[i]=='|'; i--) {
				putchar('|');
				/* change bar replaces leading space, if available */
				if (scnt) scnt--; else ci++;
			}
		}
		/* strip trailing spaces */
		for (; i>=0 && isspace(p[i]); i--)
			/* empty */;
		p[i+1]='\0';

		if (i>=0 && !fHeaders && !fRoff) printf("%*s",scnt,"");
		oscnt=scnt;
		subsect=(fSubsections && (scnt==2||(scnt==3&&*p&&p[1]=='\x08')));

		if (fHeaders && !sect && !subsect) {line++; continue;}
		else if (fRoff) {
			if (sect) printf(".SH ");
			else if (subsect) printf(".SS ");
		}

		/*** iterate over each character in line, ***/
		/*** handling underlining, tabbing, copyrights ***/
		hl=maybesc=-1;
		rscnt=0; rci=0; fIP=0;
		for (i=0; p[i]; i++) {

			/* special characters for fRoff mode only */
			if (fRoff) {
				rci++;
				switch(p[i]) {
				   case '"':	/* smart quotes */
					if (i==0 || (i>0 && isspace(p[i-1]))) printf("\\*(lq"); else printf("\\*(rq");
					continue;
					break;
				   case '-':	/* dash before command-line options */
					if (i==0 && oempty) { printf(".IP "); fIP=1; }
					if (p[i+1]=='\0') {fHyph=1; continue;}
					if (rci==1 || (rci>1 && isspace(p[i-1]))) putchar('\\');
					break;
				   case ' ':	/* squeeze out multiple spaces */
				   case '\t':
					p[i]=' ';
					if (fHyph) {putchar('\n'); rci=1; fHyph=0;}
					if (rci==1 || rscnt++>0) {plain[ci++-scnt]=' '; continue;}
					break;
				   case '.':
					if (rci==1) printf("\\&");
					break;
				}
				/* reset roff-specific counters */
				if (p[i]!=' ' && p[i]!='\t') {
					if (fIP && rscnt) { fIP=0; putchar('\n'); rci=0;}
					rscnt=0;
				}
			}

			/* case statement here in place of if chain? */
/* Tk 3.x's text widget tabs too crazy
			if (p[i]==' ' && strncmp("     ",&p[i],5)==0) {
				putchar('\t'); i+=5-1; ci++; continue;
			} else
*/
/* copyright symbol: too much work for so little
			if (p[i]=='o' && (strncmp("opyright (C) 19",&p[i],15)==0
					    || strncmp("opyright (c) 19",&p[i],15)==0)) {
				printf("opyright \xd3 19");
				addtag(SYMBOL, line, ci+9, ci+10);
				i+=15-1; ci+=13; continue;
			} else
*/
			if (p[i]=='\t') {
				/* Ultrix puts tabs within sentences--gag! */
				while ((ci+indent+1)%fTabstops)
					putchar(plain[ci++-scnt]=' ');
				p[i]=' ';
			}

			if ((p[i]=='_' && p[i+1]=='\x08' && p[i+2]!='_' && p[i+3]!='\x08')
				|| (p[i]=='\x08' && p[i+1]=='_')) {
				/* italics */
				/* start tag only if not already in one */
				if (hl==-1) { hl=ci; if (fRoff) printf("\\fI"); else tt=ITALICS; }
				i+=2;
			} else if (p[i]=='_' && p[i+2]==p[i+4] && p[i+1]=='\x08' && p[i+3]=='\x08' && p[i+2]!='_') {
				/* bold italics (Solaris is BRAIN DEAD!) */
				if (hl==-1) { hl=ci; if (fRoff) printf("\\f4"); else tt=BOLDITALICS; }
				for (i+=2; p[i]==p[i+2] && p[i+1]=='\x08';)
					i+=2;
			} else if (p[i]==p[i+2] && p[i+1]=='\x08') {
				/* boldface */
				if (hl==-1) { hl=ci; if (fRoff) printf("\\fB"); else tt=BOLD; }
				while (p[i]==p[i+2] && p[i+1]=='\x08')
					i+=2;
			} else if (p[i+1]=='\x08' &&
					 ((p[i]=='o' && p[i+2]=='+') ||
					 (p[i]=='+' && p[i+2]=='o')) ) {
				/* bullets */
				i+=2;
				while (p[i+1]=='\x08' &&		/* bold bullets(!) */
					 (p[i]=='o' || p[i+2]=='+') )
					i+=2;
				if (fRoff) {
					if (i==0+2 && oempty) {printf(".IP "); fIP=1;}
					printf("\\(bu");
				} else { putchar('\xb7'); addtag(SYMBOL, line, ci, ci+1); }
				ci++; continue;
			} else if (p[i+1]=='\x08' && p[i]=='+' && p[i+2]=='_') {
				/* plus/minus */
				if (fRoff) printf("\\(+-"); else putchar('\xb1');
				i+=2; ci++; continue;
			} else if (p[i+1]=='\x08' && p[i]=='|' && p[i+2]=='-') {
				/* dagger */
				if (fRoff) printf("\\(dg"); else putchar('\xa7');
				i+=2; ci++; continue;
			} else if (p[i]=='\x08') {
				/* supress unattended backspaces */
				continue;
			} else if (p[i]=='\x1b' /*&& (p[i+1]=='9'||p[i+1]=='8')*/) {
				/* skip unrecognized escape codes */
				i++; continue;
			} else if (!sect && (isupper(p[i]) /*|| p[i]=='_'*/ || p[i]=='&')) {
				if (hl==-1 && maybesc==-1) {maybesc=ci;}
			} else {
				/* end of tag, one way or another */
				if (hl>=0) {
					if (fRoff) printf("\\fR"); else addtag(tt, line, hl, ci);
				} else if (!fRoff && maybesc>=0 && ci-maybesc>=2) {
					addtag(SMALLCAPS, line, maybesc, ci);
				}
				maybesc=hl=-1;
			}
			if (!p[i]) break;	/* safety check */
			/* escape some chars for Tcl */
			if (fTk && strchr(bads,p[i])!=NULL) {putchar('\\');}
/*			switch (p[i]) {
			   case '\\': case '"': case '[': case ']': case '$':
				putchar('\\');
			}
*/
			putchar(p[i]);
			plain[ci-scnt]= (strchr(menubads,p[i])==NULL)?p[i]:' ';
			ci++;
		}
		if (hl>=0) {
			if (fTk) addtag(tt, line, hl, ci);
			if (fRoff) printf("\\fR");
		} else if (maybesc>=0 && ci-maybesc>=2) addtag(SMALLCAPS, line, maybesc, ci);
		if (fTk) printf("\\n\"\n"); else if (!fRoff) putchar('\n');
		else if (fRoff) {
			if (!fHyph || scnt+ci<MINRM) putchar('\n');
			if (!fHyph && scnt+ci<MINRM) printf(".br\n");
		}
		plain[i=ci-scnt]='\0';


		/*** deal with section titles, hyperlinks ***/

		if (sect || subsect) {
			for (p=plain; *p; p++) *p=tolower(*p);
			for (/*i=ci-scnt-1*/; isspace(plain[i]) && i>=0; i--) /* empty */;
			plain[i+1]='\0';
			if (!fTk) {
			} else if (!subsect) {
				seealso = strcmp(plain,"see also")==0;
				tagc=0; addtag(TITLE, line, 0, ci);	/* embolden sections */
				printf("$w.sections.m add command -label {%s} -command \"$w.show yview %d\"\n",
					  plain, line-1 /* pickplace has zero-based offset */);
				sectcnt++;
			} else if (!osubsect && sectcnt<50) {	/* damage control */
				/*tagc=0; addtag(ITALICS, line, 0, ci);*/	/* italicize subsections */
				printf("$w.sections.m add command -label {   %s} -command \"$w.show yview %d\"\n",
					  plain, line-1 /* pickplace has zero-based offset */);
				sectcnt++;
			}
		} else if (seealso && fTk) {
			if (plain[i-1]=='-') {plain[i-1]='\0'; printf("append manx(links) {%s}\n", plain);}
			else printf("append manx(links) {%s,}\n", plain);
		}
		osubsect=subsect;

		line++;
		if (fTk && line==300) printf("update idletasks\n");

		/* write tags right after line */
		if (fTk) for (i=0; i<tagc; i++) {
			manTag(tags[i].type, tags[i].line, tags[i].first, tags[i].line, tags[i].last);
		}
		tagc=0;
	}

	/* wrap up at end */
	if (fTk) {
		if (!sectcnt) printf("$w.sections.m add command -label (none)\n");

		if (fHeadfoot) {
			manInsert("\\n\\n"); manInsertN(headfoot); line+=2;
			manTag(TITLE, line, 0, line, strlen(headfoot));
			printf("$w.sections.m add separator\n");
			strcpy(buf,headfoot); for(i=0; buf[i]; i++) {buf[i]=tolower(buf[i]);}
			printf("$w.sections.m add command -label {%s} -command \"$w.show yview %d\"\n",
				  buf, line-1 /* pickplace has zero-based offset */);
			line++;

			if (*header) manStrip(header,line++);
			if (*header2) manStrip(header2,line++);
			if (*footer) manStrip(footer,line++);
			if (*footer2) manStrip(footer2,line++);
		}

		/* set font for menu according to number of entries */
		printf("$w.sections.m configure -font %s\n", menufonts[(sectcnt>40)]);
		printf("$w.search.cnt configure -text {%d lines}\n", line-1);
	}
}



int
main(int argc, char **argv)
{
	int c;
	extern char *optarg;
	extern int optind, opterr;

	while ((c=getopt(argc,argv,"bckmrvHTn:t:s:"))!=-1)
		switch (c) {
		   case 'k': fHeadfoot=1; break;
		   case 'b': fSubsections=1; break;
		   case 'c': fChangeleft=1; break;
		   case 'T': fTk=1; fRoff=0; fHeaders=0; break;
		   case 'n': manName=optarg; break;
		   case 't': fTabstops=atoi(optarg); break;
		   case 'm': fMan=1; break;
		   case 'r': fRoff=1; fTk=0; fMan=1; fHeaders=0; break;
		   case 's': manSect=atoi(optarg); break;
		   case 'H': fHeaders=1; fTk=0; fMan=1; break;
		   case 'v':
			printf("bs2tk $Revision: 1.37 $\n");
			exit(0);
		   default:
			fprintf(stderr, "%s: unidentified option -%c\n",argv[0],c);
			exit(2);
		}

	/* read from given file name */
	if (optind<argc) {
		if (freopen(argv[optind], "r", stdin)==NULL) {
			fprintf(stderr, "%s: can't open %s\n", argv[0],argv[optind]);
			exit(1);
		}
	}

	filter();
	return(0);
}
