import sys

import creole_lexer as cl
from tokens import *

debug = False

TOGGLE_MAP = {cl.Token.Inline.Bold: BOLD,
              cl.Token.Inline.Italic: ITALIC,
              cl.Token.Inline.Monospace: MONOSPACE,
              cl.Token.Inline.Superscript: SUPERSCRIPT,
              cl.Token.Inline.Subscript: SUBSCRIPT,
              cl.Token.Inline.Underline: UNDERLINE,
              #cl.Token.Inline.Strike: STRIKE,
              }
OPEN_MAP = {cl.Token.Open.Codeblock: CODEBLOCK,
            cl.Token.Open.Code: MONOSPACE}
CLOSE_MAP = {cl.Token.Close.Codeblock: CODEBLOCK,
             cl.Token.Close.Code: MONOSPACE}
ENTITY_MAP = {cl.Token.Hrule: HRULE,
              cl.Token.LineBreak: LINEBREAK}
LINK_START_MAP = {cl.Token.Start.Link: LINK,
                  cl.Token.Start.Image: IMAGE}
LINK_END_MAP = {cl.Token.End.Link: LINK,
                cl.Token.End.Image: IMAGE}
LIST_MAP = {cl.Token.List.Unordered: UNORDERED_ITEM,
            cl.Token.List.Ordered: ORDERED_ITEM,
            cl.Token.Blockquote: BLOCKQUOTE_LINE}
TABLE_MAP = {cl.Token.Table.Heading: TABLE_HEADING,
             cl.Token.Table.Cell: TABLE_CELL}
PUNCT_MAP = {cl.Token.EmDash: u'\u2014',
             cl.Token.EnDash: u'\u2013',
             cl.Token.OpenQuote: u'\u201C',
             cl.Token.CloseQuote: u'\u201D',
             cl.Token.OpenSingleQuote: u'\u2018',
             cl.Token.Ellipsis: u'\u2026',
             cl.Token.DoubleBoth: u'\u21D4',
             cl.Token.SingleBoth: u'\u2194',
             cl.Token.DoubleRight: u'\u21D2',
             cl.Token.SingleRight: u'\u2192',
             cl.Token.DoubleLeft: u'\u21D0',
             cl.Token.SingleLeft: u'\u2190',
             }
TILDE_OP = u'\u223C'
TEXT = set((cl.Text, cl.Punctuation))
NL = (cl.Token.NL,)
SP = (cl.Token.SP,)
FLUSH_STACK_BEFORE = set(LIST_MAP.keys() +
                         [cl.Token.ParaBreak, cl.Token.Heading,
                          cl.Token.Macro.Start.Close, cl.Token.EOF])
SKIP_WHITESPACE_BEFORE = set(LIST_MAP.keys() + TABLE_MAP.keys()
                             + [cl.Token.Heading, cl.Token.EOF,
                                cl.Token.Table.End, cl.Token.List.End])
IGNORE = (cl.Token.Table.End, cl.Token.List.End)

def token_stream(markup):
    if markup.endswith('\n\n'):
        pass
    elif markup.endswith('\n'):
        markup += '\n'
    else:
        markup += '\n\n'
    if debug: print >>sys.stderr, repr(markup)
    lexer = cl.CreoleLexer()

    for t in lexer.get_tokens(markup):
        if t[0] != cl.Token.Nothing:
            yield t
    yield (cl.Token.EOF, '')

TOKEN_STREAM_CACHE = {}
# This cache has only marginal benefit... with performance improvements
# deeper, it wouldn't make sense.
def caching_token_stream(markup):
    if markup in TOKEN_STREAM_CACHE:
        return TOKEN_STREAM_CACHE[markup]
    else:
        l = list(token_stream(markup))
        TOKEN_STREAM_CACHE[markup] = l
        return l

def tokenize(markup, error_func=lambda:None):
    def gen_error(msg):
        error_func()
        return Entity(ERROR, msg)
    
    text_bits = []
    stack = []
    linkdest = None
    macroname = None
    argstr = None
    eatspaces = False
    eatspacesnext = False
    implicit_link = False
    open_cell = None
    # Delayed until we see something other than eaten spaces, dropped if
    # that's a newline or EOF
    delayed_token = None
    open_quote_next = True
    last_quote = None

    for typ, toktext in caching_token_stream(markup):
        if debug: print >>sys.stderr, 'TOKEN:', (typ, toktext)

        if (stack and stack[-1][0] == HEADING
            and typ in NL + (cl.Token.ParaBreak, cl.Token.EOF)):
            while text_bits and not text_bits[-1].strip():
                text_bits.pop()
            while text_bits and text_bits[-1] == '=':
                text_bits.pop()
            while text_bits and not text_bits[-1].strip():
                text_bits.pop()
            if text_bits:
                yield Text(''.join(text_bits))
                text_bits = []
            yield End(HEADING, stack[-1][1])
            stack.pop()

        if implicit_link and typ not in TEXT and typ != cl.Token.Escape:
            path = ''.join(text_bits)
            #print >>sys.stderr, ["End IL: ", typ, path[-1]]
            text_bits = []
            if not path[-1].isalnum() and path[-1] != '/':
                text_bits.append(path[-1])
                path = path[:-1]
            yield Start(MONOSPACE)
            yield Entity(LINK, implicit_link + '://' + path)
            yield End(MONOSPACE)
            implicit_link = False
        if delayed_token is not None and typ not in SP:
            if typ not in NL and typ != cl.Token.EOF:
                yield Start(delayed_token)
                open_cell = delayed_token
            delayed_token = None

        if typ in TEXT:
            #print >>sys.stderr, "TEXT", toktext, (linkdest, argstr, macroname)
            text = None

            if argstr is None and macroname is None:
                # We only muck with quote state in here because macro n
                # ames/args don't affect smart quote behavior.
                last_quote = None
                # These are the punct that we expect an open quote could
                # immediately follow.
                open_quote_next = (toktext in ('-', '/', '(', '[', '{'))
            if linkdest is None and argstr is None and macroname is None:
                # Weird text cases.
                if debug: print >>sys.stderr, 'weird text', toktext, text_bits
                if (toktext == '-' and len(text_bits) > 0
                    and text_bits[-1].isdigit()):
                    text = u'\u2013'
                elif (len(text_bits) > 0 and text_bits[-1] == u'\u2013'
                      and not toktext.isdigit()):
                    if len(text_bits) > 1 and text_bits[-2].isdigit():
                        text_bits[-1] = '-' #u'\u2014'
                    else:
                        text_bits[-1] = u'\u2014'
            if text is None:
                text = toktext
        elif typ == cl.Token.SingleQuote:
            if debug: print >>sys.stderr, 'oqn', open_quote_next
            if open_quote_next and (last_quote is None or last_quote == typ):
                text = u'\u2018'
            else:
                text = u'\u2019'
                last_quote = typ
        elif typ == cl.Token.DoubleQuote:
            if open_quote_next and (last_quote is None or last_quote == typ):
                text = u'\u201C'
            else:
                text = u'\u201D'
                last_quote = typ
        elif typ in PUNCT_MAP:
            open_quote_next = True
            if linkdest is None and argstr is None and macroname is None:
                # Weird punct cases.
                if (typ == cl.Token.EnDash and len(text_bits) > 0
                    and text_bits[-1] == ' '):
                    if debug: print >>sys.stderr, 'Emdashing'
                    text_bits.pop()
                    typ = cl.Token.EmDash
                    eatspacesnext = True
                elif (typ == cl.Token.EnDash and len(text_bits) > 0
                    and not text_bits[-1].isdigit()):
                    if debug: print >>sys.stderr, 'Emdashing2'
                    typ = cl.Token.EmDash
                    eatspacesnext = True

            text = PUNCT_MAP[typ]
            if debug: print >>sys.stderr, typ, repr(text)
        elif typ in SP or (typ in NL and not open_cell):
            open_quote_next = True
            if macroname is not None and argstr is None:
                argstr = ''
                continue
            elif eatspaces:
                continue
            elif linkdest is None and argstr is None and macroname is None:
                # Weird space cases.
                if (len(text_bits) > 0 and text_bits[-1] == u'\u2013'
                    and (len(text_bits) > 1 or typ not in NL)):
                    if debug: print >>sys.stderr, 'Weird space case'
                    text_bits[-1] = u'\u2014'
                    continue
            text = toktext
        elif typ == cl.Token.Escape:
            txt = toktext
            assert len(txt) == 2, repr(txt)
            if (txt[1].isalnum()
                and (linkdest is not None or implicit_link)):
                text = txt
            elif txt[1].isdigit():
                text = TILDE_OP + txt[1]
            elif txt[1].isupper():
                text = txt
            else:
                text = txt[1]
        elif typ == cl.Token.LinkImplicitInfix:
            if text_bits and text_bits[-1] in ('http', 'https'):
                implicit_link = text_bits.pop()
                if text_bits:
                    yield Text(''.join(text_bits))
                text_bits = []
                continue
            else:
                text = toktext
        else:
            eatspaces = False
            last_quote = None
            # Not a text node or something infix, so flush text.
            if typ in SKIP_WHITESPACE_BEFORE:
                while text_bits and not text_bits[-1].strip():
                    text_bits.pop()
            if text_bits:
                yield Text(''.join(text_bits))
                text_bits = []
            text = None

            if (typ in NL + (cl.Token.EOF, cl.Token.ParaBreak)
                or (typ == cl.Token.Macro.Start.Close
                    and stack[-1][0] == TABLE_ROW)) and open_cell:
                if open_cell is not True:
                    yield End(open_cell)
                s, a = stack.pop()
                assert s == TABLE_ROW, (s, a, stack)
                yield End(TABLE_ROW)
                open_cell = None
                if typ in NL:
                    continue

            if typ in FLUSH_STACK_BEFORE:
                # If an environment ender, flush stack.
                #print >>sys.stderr, 'flushing', stack, 'before', typ
                while stack:
                    if stack[-1][0] == MACRO:
                        break
                    esty, earg = stack.pop()
                    yield End(esty, earg)

            if typ == cl.Token.EOF:
                assert macroname is None, macroname
                break
            elif typ == cl.Token.ParaBreak:
                yield Entity(ENV_BREAK)
                open_quote_next = True
            elif typ in TOGGLE_MAP:
                sty = TOGGLE_MAP[typ]
                if stack and stack[-1][0] == sty:
                    yield End(sty, stack[-1][1])
                    stack.pop()
                else:
                    stack.append([sty, None])
                    yield Start(sty)
            elif typ in OPEN_MAP:
                sty = OPEN_MAP[typ]
                stack.append([sty, None])
                yield Start(sty)
            elif typ == cl.Token.Heading:
                lev = len(toktext.strip())
                stack.append([HEADING, lev])
                yield Start(HEADING, lev)
                eatspaces = True
                open_quote_next = True
            elif typ in CLOSE_MAP:
                sty = CLOSE_MAP[typ]
                if stack and stack[-1][0] == sty:
                    yield End(sty, stack[-1][1])
                    stack.pop()
                else:
                    if debug: print >>sys.stderr, 'error!', sty, stack
                    yield gen_error(toktext)
            elif typ in ENTITY_MAP:
                yield Entity(ENTITY_MAP[typ])
            elif typ in LIST_MAP:
                item = LIST_MAP[typ]
                depth = len(toktext.strip())
                if item is not None:
                    stack.append([item, depth])
                    yield Start(item, depth)
                eatspaces = True
            elif typ in TABLE_MAP:
                if open_cell:
                    yield End(open_cell)
                else:
                    stack.append([TABLE_ROW, None])
                    yield Start(TABLE_ROW)
                cell = TABLE_MAP[typ]
                if typ == cl.Token.TableCell:
                    # Delay because we drop it at end of line
                    delayed_token = cell
                    open_cell = True
                else:
                    yield Start(cell)
                    open_cell = cell
                eatspaces = True
            elif typ in LINK_START_MAP:
                stack.append([LINK_START_MAP[typ], None])
                linkdest = ''
            elif typ == cl.Token.LinkPipe:
                assert linkdest is not None
                yield Start(stack[-1][0], linkdest)
                stack[-1][1] = linkdest
                linkdest = None
            elif typ in LINK_END_MAP:
                sty = LINK_END_MAP[typ]
                if stack and stack[-1][0] == sty:
                    if linkdest is None:
                        # pipe already hit
                        yield End(sty, stack[-1][1])
                    else:
                        yield Entity(sty, linkdest)
                        linkdest = None
                    stack.pop()
                else:
                    if debug: print >>sys.stderr, 'lem error!', sty, stack
                    yield gen_error(toktext)
            elif typ == cl.Token.Macro.Start.Open:
                macroname = ''
                macroend = False
            elif typ == cl.Token.Macro.Start.Close:
                macroname = ''
                macroend = True
            elif typ == cl.Token.Macro.End.Inline:
                if macroend:
                    assert not argstr, (macroname, argstr)
                    if stack and stack[-1] == [MACRO, macroname]:
                        yield End(MACRO, macroname)
                        stack.pop()
                    else:
                        if debug: print >>sys.stderr, 'merror!', stack
                        yield gen_error('<</%s>>' % macroname)
                else:
                    stack.append([MACRO, macroname])
                    assert macroname is not None
                    yield Start(MACRO, (macroname, argstr))
                macroname = None
                argstr = None
            elif typ in (cl.Token.Macro.End.Entity,
                         cl.Token.Macro.End.Broken):
                yield Entity(MACRO, (macroname, argstr))
                macroname = None
                argstr = None
                open_quote_next = False
            elif typ in (cl.Token.Macro.TeX.Open,
                         cl.Token.Macro.TeX.ContentOpen):
                assert macroname is None
                if typ == cl.Token.Macro.TeX.Open:
                    assert toktext.startswith('\\') and toktext.endswith('{')
                    mn = toktext[1:-1]
                else:
                    assert toktext.startswith('{\\')
                    if toktext.endswith(' '):
                        mn = toktext[2:-1]
                    else:
                        mn = toktext[2:]
                stack.append([MACRO, mn])
                yield Start(MACRO, (mn, None))
                open_quote_next = True
            elif typ == cl.Token.Macro.TeX.Entity:
                assert macroname is None
                assert toktext.startswith('\\')
                if toktext.endswith(' '):
                    yield Entity(MACRO, (toktext[1:-1], None))
                else:
                    yield Entity(MACRO, (toktext[1:], None))
                open_quote_next = False
            elif typ == cl.Token.Macro.TeX.SingleChar:
                assert macroname is None
                assert argstr is None
                assert len(toktext) == 3 and toktext.startswith('\\')
                yield Start(MACRO, (toktext[1], None))
                yield Text(toktext[2])
                yield End(MACRO, toktext[1])
                open_quote_next = False
            elif typ == cl.Token.Macro.TeX.Close:
                if debug: print >>sys.stderr, "TEX_MACRO_END", stack
                if stack and stack[-1][0] == MACRO:
                    sty, name = stack.pop()
                    yield End(MACRO, name)
                else:
                    yield gen_error(toktext)
                open_quote_next = False
            elif typ == cl.Token.Macro.TeX.ArgBreak:
                open_quote_next = True
                yield Entity(MACRO, ('break', None))
            else:
                assert typ in IGNORE, (typ, toktext)
            continue
        assert text is not None

        # Text node handling.
        if eatspacesnext:
            eatspaces = True
            eatspacesnext = False
        else:
            eatspaces = False
        if linkdest is not None:
            linkdest += text
        elif argstr is not None:
            argstr += text
        elif macroname is not None:
            macroname += text
        else:
            text_bits.append(text)
            if debug: print >>sys.stderr, text_bits
