#include <stdio.h>
#define FALSE	0
#define TRUE	1
#define FOUND		0       /* search_file return codes */
#define NOT_FOUND       1
#define REGERROR	2
#define NL	'\n'
#define MAXLINE 256
#define DEBUG(l, f, s)	if (Debug >= l) fprintf(stderr, f, s)

extern char buf[];
extern char *gap_start, *gap_end, *buffer_end;
extern int  Debug;

static int dot_count, l, last_star, last_string, regl = -1, st, type;
static char reg[132];
static struct {
    char search_char;
    int len;
    int start;
    int search_type;
} reg_info [20];
/***
 *	search_file (tp, ti, tl, fi, fe, ami, ame, auparrow_flag, adollar_flag, acode);
 *	int tp, ti, tl, fi, fe, *ami, *ame, *auparrow_flag, *adollar_flag, *acode;
 *
 *	This routine searches within the buffer for the given expression in
 *	the given range and returns the beginning and ending places.
 *
 *	Codes:
 *
 *       FOUND - expression found.
 *	 NOT_FOUND - expression not in range.
 *	 REGERROR - error in regular expression.
 *
 */
search_file (tp,ti,tl,fi,fe,ami,ame,auparrow_flag,adollar_flag,acode)
int ti,tl,fi,fe,*ami,*ame,*auparrow_flag,*adollar_flag,*acode;
char *tp;
{
    static int omit_newline, uparrow_flag;
    char ch;
    int i, j, te;
    int match_start[20],reentry_point[20];

    if (tl == 0) {
	if (regl >= 0) goto match;
	printf ("// undefined in regular expression.\n");
fatal:
	regl = -1;
	*acode = REGERROR;
	return;
    }

    te = ti + tl - 1;
    st = 0;
    l = 0;
    type = 0;

    if (tp[ti] == '^') {
	type = 1;
	ti++;
	reg[0] = NL;
	st++;
	uparrow_flag = TRUE;
    }
    else uparrow_flag = FALSE;

    regl = -1;
    last_string = -1;
    last_star = -1;
    omit_newline = FALSE;

parse_expression:
    while ((tl = te-ti+1) > 0) {
	ch = tp[ti];
	DEBUG(5,"Parse loop  CH = %c\n", ch);
	switch (ch) {
	case '.':
	    if (!end_subexpression()) goto fatal;
	    i = 0;
	    tl = te - ti + 1;
	    if (tl > 1) i = verify (&tp[ti+1],tl-1,'.')+1;
	    if (i <= 0) i = tl;
	    ti += i;
	    if (ti <= te && tp[ti] == '*') {
	        type = 2;
		ti++;
		i--;
	    }

	    if (i > 0) if (!builder (5,i)) goto fatal;
	    if (type != 2) type = 3;
	    break;
	case '*':
	    if (l <= 0) {
		printf ("Invalid use of * in regular expression.\n");
		goto fatal;
	    }
	    l--;
	    ti++;
	    if (!end_subexpression()) goto fatal;
	    if (type == 2) break;
	    if (regl >= 0 && regl == last_star) {
		if (reg_info[last_star].search_char == reg[st]) break;
	    }
	    if (!builder (4,0)) goto fatal;
	    last_star = regl;
	    reg_info[last_star].search_char = reg[st];
	    type = 3;
	    break;
	case '\\':
	    ti++;
	    if (st+l >= 131) {
long_string:
		printf ("Regular expression is too long.\n");
		goto fatal;
	    }
	    reg[st+l]=tp[ti++];
	    l++;
	    break;
	case '$':
	default:
	    if (ti == te && ch == '$') {
		omit_newline = TRUE;
		ch = NL;
	    }
	    if (st+l >= 131) goto long_string;
	    reg[st+l]=ch;
	    ti++;
	    l++;
	    break;
	}
    }

    /* End of parsing loop */
    if (!end_subexpression()) goto fatal;
    if (type == 2) if (!builder (2,0)) goto fatal;

match:
    if (fe == -1 || fi > fe) goto fail;

	/* We'll try to do this without moving the gap... */

    for (i=0;i<=regl;i++) {
	reentry_point[i] = -1;
    }

restart_search:
    match_start[0] = fi;
    i = 0;
    st = 0;
    te = fi - 1;

search_loop:
    tl = fe - fi + 1;
    l = reg_info [i].len;

    DEBUG(5,"Search loop - type = %d\n", reg_info[i].search_type);
    if (reg_info [i].search_type == 0) {
	if (l > tl) goto fail;
	j = buf_index (fi, tl, reg, l);
	if (j < 0) goto fail;
	st += l;
	goto found_first_match;

    }
    else if (reg_info [i].search_type == 1) {
	if (fi >= 1) {
	   if (!newlinep (fi - 1)) {
		j = index_nl (fi, tl) + 1;
		if (j <= 0) goto fail;
		fi += j;
		tl -= j;
		if (tl <= 0) goto fail;
	   }
	}
	st = st + l + 1;
	j = 0;
	if (l == 0) goto found_first_match;
	if (l > tl) goto fail;
	if (!buf_match (fi, &reg[1], l)) {
	    j = buf_index (fi, tl, reg, l+1) + 1;
	    if (j <= 0) goto fail;
	}

found_first_match:
	match_start [0] = fi + j;
	goto found_field;
    }
    else if (reg_info[i].search_type == 2) {
	if (l == 0) {
	    te = fi + index_nl (fi, tl) - 1;
	    if (te < fi - 1) te = fe;
	    goto next_field;
	}
	if (l > tl) goto fail_reset;
	j = buf_index (fi, tl, &reg[st], l);
	if (j < 0) goto fail_reset;
	te = 0;
	if (j > 0) te = index_nl (fi, j) + 1;
	if (te > 0) {
	    fi += te;
	    reentry_point [i] = -1;
	    goto restart_search;
	}
	reentry_point [i] = fi + j + 1;
	st += l;
	goto found_field;
    }
    else if (reg_info[i].search_type == 3) {
	if (l > tl) goto fail_retry;
	j = 0;
	if (!buf_match (fi, &reg[st], l)) goto fail_retry;
	st += l;
	goto found_field;
    }
    else if (reg_info[i].search_type == 4) {
	reentry_point [i] = -1;
	if (tl <= 0) goto next_field;
	match_start [i] = fi;
	l = buf_verify (fi, tl, reg_info[i].search_char);
	if (l < 0) l = tl;
	if (l == 0) goto next_field;
	reentry_point [i] = fi + l - 1;
	j = 0;
	goto found_field;
    }
    else {
	if (tl < 1) goto fail_retry;
	j = 0;
	if (index_nl (fi, l) >= 0) goto fail_retry;

found_field:
	DEBUG(5,"Found field @ %d",fi+j);
	DEBUG(5,"  after skipping %d\n", j);
	fi = fi + j + l;
	te = fi - 1;

next_field:
	i++;
	if (i <= regl) goto search_loop;
	if (omit_newline) te--;
	*ami = match_start [0];
	DEBUG(5,"search_file success - *ami = %d\n", *ami);
	*ame = te;
	*auparrow_flag = uparrow_flag;
	*adollar_flag = omit_newline;
	*acode = FOUND;
	return;
    }


fail_reset:
    reentry_point [i] = -1;

fail_retry:
    DEBUG(5,"fail_retry; level = %d\n", i);
    i--;
    if (i < 0) {
	match_start[0]++;
	if (match_start[0] > fe) {
fail:
	    *acode = NOT_FOUND;
	    return;
	}
	fi = match_start[0];
	goto restart_search;
    }
    fi = reentry_point [i];
    if (fi == -1) goto fail_retry;
    st = reg_info [i].start;
    if (reg_info[i].search_type == 2) goto search_loop;
    if (reentry_point[i] < match_start [i]) goto fail_reset;
    reentry_point[i]--;
    goto next_field;
}
/***
 *	newlinep (pos)
 *	int pos;
 *
 *	Procedure to determine if a given character in the buffer
 *	is a newline.  It does so without revealing knowledge of the
 *	gap to the caller.
 *
 */
newlinep(pos)
int pos;
{
    register char *sp;

    sp = buf + pos;
    if (sp >= gap_start) sp+= (gap_end - gap_start);

    return (*sp == NL);
}
/***
 *
 *	index_nl (pos, len)
 *	int pos,len;
 *
 *	index_nl - This function looks at the buffer for the first NL
 *	after the buffer coordinate pos.  If no NL is found within 'len'
 *	characters, -1 is returned.
 *
 */
index_nl(pos,len)
int pos,len;
{
    register char *sp;

    DEBUG(5,"index_nl - pos = %d", pos);
    sp = buf + pos;
    if (sp >= gap_start) sp += (gap_end - gap_start);
    while (len-- > 0 && *sp != NL)
	if (++sp == gap_start)
	    sp = gap_end;

    if (len < 0) {
	DEBUG(5, "   Not found.\n", 0);
	return (-1);
    }

    if (sp >= gap_start)		/* adjust gap */
	sp -= (gap_end - gap_start);

    DEBUG(5,"  found - %d\n", sp-buf-pos);
    return (sp-buf-pos);		/* index of NL */
}
/***
 *
 *	buf_index (pos, len, strp, strl)
 *	int pos, len, strl;
 *	char *strp;
 *
 *	buf_index - Procedure to perform the general index function while
 *	ignoring the existence of the buffer gap.
 *
 */
buf_index(pos,len,strp,strl)
int pos,len, strl;
char *strp;
{
    char *bp, *ep, *sp, *tp;
    int sl;

    DEBUG (5, "buf_index - pos = %d", pos);
    DEBUG (5, "  len = %d", len);
    bp = buf + pos;
    if (bp >= gap_start) bp += (gap_end - gap_start);
    ep = buf + pos + len - strl;		/* end pointer */
    if (ep >= gap_start) ep += (gap_end - gap_start);

    while (bp <= ep) {
	for (sp = strp, sl = strl, tp = bp; sl-- > 0; sp++,tp++) {
	   if (tp == gap_start) tp = gap_end;
	   DEBUG(7,"*sp=%c",*sp);
	   DEBUG(7," *tp=%c",*tp);
	   if (*sp != *tp) break;
	}

	if (sl < 0) {				/* found string */
	    if (bp >= gap_start) bp -= (gap_end - gap_start);
	    DEBUG(5,"  Found @ %d\n", bp-buf-pos);
	    return (bp - buf - pos);
	}
        bp++;
	if (bp == gap_start) bp = gap_end;
    }

    DEBUG(5,"   NOT FOUND\n",0);
    return (-1);				/* not found */
}
/***
 *	buf_match (pos, str, len)
 *	int pos, len;
 *	char *str;
 *
 *	buf_match - Procedure to check if a piece of the buffer matches
 *	a given string.  As usual, the idea is to disguise the fact that
 *	there's a big gap in the buffer
 *
 */
buf_match(pos,str,len)
int pos, len;
char *str;
{
    DEBUG(5, "buf_match:\n", 0);
    return (buf_index (pos, len, str, len) != -1);	/* steal from buf_index */
}
/***
 *	buf_verify (pos, len, ch)
 *	int pos, len;
 *	char ch;
 *
 *	buf_verify - Function to skip over characters that is not ch
 *	paying attention to the gap
 *
 */
buf_verify(pos,len,ch)
int pos,len;
char ch;
{
    char *bp, *ep;

    bp = buf + pos;
    if (bp >= gap_start) bp += (gap_end - gap_start);
    ep = buf + pos + len - 1;
    if (ep >= gap_start) ep += (gap_end - gap_start);

    while (len-- > 0) {
	if (bp == gap_start) bp = gap_end;
	if (*bp != ch) {			/* found non-occurrence */
	   if (bp >= gap_start) bp -= (gap_end - gap_start);
	   return (bp - buf - pos);
	}
	bp++;
	if (bp == gap_start) bp = gap_end;
    }

    return (-1);				/* all ch's */
}
/***
 *	end_subexpression ()
 *
 *	end_subexpression - handles previous end_subexpression found.  Uses
 *	global variable type to build any built-up strings.  Also used
 *	for optimizing a*.* to .* and a*aaa to aaaa*.
 *
 *	Returns false on any type of builder error.
 */
end_subexpression()
{
    int dot_count, ir;

    if (l > 0 || type == 1) {
	DEBUG(5,"end_subexpression - type = %d\n", type);
	if (type == 2) {
	    dot_count = 0;
	    for (ir = regl; ir >= last_string+1; ir--) {
		if (reg_info [ir].search_type == 5)
		     dot_count += reg_info [ir].len;
		else if (reg_info [ir].search_type != 4)
		    goto done_dot_star;
		regl--;
	    }
done_dot_star:
	    if (dot_count > 0) if (!builder (5, dot_count)) return (FALSE);
	    last_star = 0;
	}
	if (last_string == regl - 1 && last_star == regl) {
	    ir = verify (&reg[st],l,reg_info[last_star].search_char);
	    if (ir < 0) ir = 1;
	    if (ir > 0) {
		if (last_string == -1) {
		    last_string = 0;
		    reg_info [1].search_char = reg_info [0].search_char;
		    reg_info [1].len = reg_info [0].len;
		    reg_info [1].start = reg_info [0].start;
		    reg_info [1].search_type = reg_info [0].search_type;
		    last_star++;
		    reg_info [1].start = ir + 1;
		    reg_info [1].search_type = 0;
		    regl = 1;
		}
		reg_info[last_string].len += ir;
		st += ir;
		l -= ir;
		if (l == 0) return (TRUE);
	    }
	}

	if (!builder (type, l)) return (FALSE);
	type = 3;
	last_string = regl;
	st += l;
	l = 0;
    }

    return (TRUE);
}
/***
 *	builder (id, size)
 *	int id, size
 *
 *	builder - Thing to push an entry onto the parse stack.  Returns
 *	FALSE on overflow.
 *
 */
builder(id,size)
int id,size;
{
    DEBUG(5, "Builder - type = %d", id);
    DEBUG(5, "   len = %d\n", size);
    if (regl >= 19) {
	printf ("Regular expression is too complex.\n");
	return  (FALSE);
    }
    regl++;
    reg_info[regl].search_type = id;
    reg_info[regl].len = size;
    reg_info[regl].start = st;

    return (TRUE);
}
/***
 *	verify (strp, strl, ch)
 *	char *strp, ch;
 *	int strl;
 *
 *	verify - skip over characters in string that are ch
 *
 */
verify(strp,strl,ch)
char *strp, ch;
int strl;
{
    char *sp;

    sp = strp;
    while (strl-- > 0)
	if (*sp++ != ch)
	    return (sp - strp - 1);

    return (-1);
}
