import re

from .formats import BaseFormat, placeholder
from .tokens import *

# These are done in order, so make sure they don't interfere with each other
QUOTE_MAP = (
    ('\\',r'\textbackslash '), # This should almost certainly be first
    ('{',r'\{'),
    ('}',r'\}'), # Anything with {}-delimited arguments goes after here
    ('%',r'\%'),
    ('&',r'\&'),
    ('$',r'\$'),
    ('#',r'\#'),
    ('_',r'\_'),
    ('~',r'\~{}'),
    ('^',r'\^{}'),
    ('<',r'$<$'),
    ('>',r'$>$'),
    ('[',r'{[}'), # Needed, say, right after \item
    (']',r'{]}'), # Needed /in/ an optional argument, for example \item[{]}]
    ("'",r'\textquotesingle{}'), # To keep from smarting (we do that above)
    ('"',r'\textquotedbl{}'),
    ('\n',r' '), # To keep \n\n from being a paragraph break
)

FONT_SUBSTS = [
#    (re.compile(ur'([\u2700-\u27BF]+)'), r'{\\dingbat \1}'),
    (re.compile(ur'([\u2200-\u22FF\u25A0-\u27BF]+)'), r'{\\sffamily \1}'),
    (re.compile(ur'(['
                ur'\u2E80-\u33FF'
                ur'\u3400-\u4DB5'
                ur'\u4E00-\u9FCC'
                ur'\uA000-\uA4FF'
                ur'\uA960-\uA97F'
                ur'\uD7B0-\uD7FF'
                ur'\uF900-\uFAD9'
                ur'\uFE10-\uFE6F'
                ur'\uFF00-\uFFEF]+)'), r'{\\japanese \1}'),
]
_NON_BMP = (
    (u'\U00010000', u'\U0001007F', 'japanese'),
    (u'\U0001B000', u'\U0001B0FF', 'japanese'),
    (u'\U0001D100', u'\U0001D1FF', 'japanese'),
    (u'\U0001D360', u'\U0001D37F', 'japanese'),
    (u'\U00010000', u'\U000100F7', 'japanese'),
    (u'\U0001F000', u'\U0001F02F', 'japanese'),
    (u'\U0001F100', u'\U0001F7FF', 'japanese'),
    (u'\U00020000', u'\U0002B81F', 'japaneseext'),
    (u'\U0002F800', u'\U0002FA1F', 'japanese'),
)
# We can't match these in a clean way, because standard python
# sucks at matching non-BMP Unicode.  (We do it by
# explicitly matching surrogate pairs on OS X and doing it
# properly on scripts).  (You could be fine matching
# either, except the first is an invalid regexp on mac.)
def sub_non_bmp(match):
    c = match.group(1)
    for start, end, font in _NON_BMP:
        assert len(c) == len(start)
        if start <= c and c <= end:
            return ur'{\%s %s}' % (font, c)
    return c
try:
    FONT_SUBSTS.append((re.compile(ur'([\U00010000-\U0010FFFF])'),
                        sub_non_bmp))
except re.error:
   FONT_SUBSTS.append((re.compile(ur'([\uD800-\uDFFF][\uD800-\uDFFF])'), 
                       sub_non_bmp))


CMD_MAP = {SUPERSCRIPT: 'textsuperscript',
           SUBSCRIPT: 'textsubscript',
           UNDERLINE: 'uline',
           STRIKE: 'sout',
           FOOTNOTE: 'footnote',
           }
BLOCK_MAP = {BOLD: 'bfseries',
             ITALIC: 'em',
             MONOSPACE: 'ttfamily',
             }
ENV_MAP = {CENTER: 'thincenter',
           RIGHT: 'thinright',
           }

LATEX_HEADINGS = {
    1: r'chapter',
    2: r'section',
    3: r'subsection',
    4: r'subsubsection',
    5: r'subsubsubsection',
    6: r'paragraph'
    }
LATEX_SIZES = {
    -4: 'tiny',
    -3: 'scriptsize',
    -2: 'footnotesize',
    -1: 'small',
    0: 'normalsize',
    1: 'large',
    2: 'Large',
    3: 'LARGE',
    4: 'huge',
    5: 'Huge'
    }

LISTMAP = {ORDERED: 'enumerate',
           UNORDERED: 'itemize',
           BLOCKQUOTE: 'bazquote',
           }

PX_TO_PT = 12. / 16

def url_to_path(url):
    assert '://' not in url
    assert url.startswith('/')
    path = '.' + url.replace('%5E', '--').replace('%20', '_').replace('%2C', ':').replace('.', '-')
    assert '%' not in path, path
    return path

class LaTeXFormat(BaseFormat):

    def __init__(self):
        self.verbatim = False
        self.table_row_start = None
        self.row_length = 0
        self.longest_row = 0

    def escape(self, text):
        if self.verbatim:
            return text
        else:
            for bad,repl in QUOTE_MAP:
                text = text.replace(bad,repl)
            # Some characters need special font handling
            for regexp,subst in FONT_SUBSTS:
                text = regexp.sub(subst, text)
            return text
    def text(self, text):
        yield self.escape(text)

    def start(self, t, arg=None):
        if t in CMD_MAP:
            yield ur'\%s{' % CMD_MAP[t]
        elif t in BLOCK_MAP:
            yield ur'{\%s ' % BLOCK_MAP[t]
        elif t == SIZE:
            yield ur'{\%s ' % LATEX_SIZES[arg]
        elif t in ENV_MAP:
            yield u'\\begin{%s}\n' % ENV_MAP[t]
        elif t in LISTMAP:
            yield u'\\begin{%s}\n' % LISTMAP[t]
        elif t in (ORDERED_ITEM, UNORDERED_ITEM):
            yield ur'\item '
        elif t == BLOCKQUOTE_LINE:
            yield ''
        elif t == PARAGRAPH:
            yield ''
        elif t == LINK:
            yield ur'\href{%s}{' % (self.escape(arg['url']))
        elif t == HEADING:
            yield ur'\%s{' % LATEX_HEADINGS[arg]
        elif t == CODEBLOCK:
            self.verbatim = True
            yield u'\\begin{verbatim}\n'
        elif t == NOINDENT:
            yield u'\n\n\\noindent\n'
        elif t == ERROR:
            yield r'\textbf{\emph{\color{red}'
        elif t == TABLE:
            tabulartype = ''
            if arg:
                if 'mode' in arg and arg['mode'] == 'equal':
                    tabulartype = 'x'
            self.longest_row = 0
            yield '\\starttable%s\n' % tabulartype
            if not arg or 'border' not in arg or arg['border']:
                yield '\\hline\n'
                self.tableborder = True
            else:
                self.tableborder = False
        elif t == TABLE_ROW:
            self.table_row_start = True
            self.row_length = 0
            yield ''
        elif t == TABLE_CELL:
            self.row_length += 1
            if self.table_row_start:
                self.table_row_start = False
                yield ''
            else:
                yield r'&'
        elif t == TABLE_HEADING:
            self.row_length += 1
            if self.table_row_start:
                self.table_row_start = False
                yield r'{\bfseries '
            else:
                yield r'& {\bfseries '
        elif t == IMAGE:
            # This only works because of the magic image fetching logic
            # that bazki.latex does, which also uses url_to_path().
            path = url_to_path(arg['url'])
            params = []
            if 'height' in arg:
                if hasattr(arg['height'], 'to_str'):
                    height = arg['height'].to_str()
                else:
                    height = str(arg['height'] * PX_TO_PT) + 'pt'
                params.append('height=%s' % (height))
            if 'width' in arg:
                if hasattr(arg['width'], 'to_str'):
                    width = arg['width'].to_str()
                else:
                    width = str(arg['width'] * PX_TO_PT) + 'pt'
                params.append('width=%s' % (width))
            if not arg.get('force', False):
                params.append('keepaspectratio')
            if len(params) > 0:
                paramstr = '[%s]' % ','.join(params)
            else:
                paramstr = ''
            yield '\n\\includegraphics%s{%s}\n\ignore{' % (paramstr, path)
        else:
            assert False, t

    def end(self, t, arg=None):
        if t in CMD_MAP or t in BLOCK_MAP or t == SIZE:
            yield u'}'
        elif t in ENV_MAP:
            yield u'\n\\end{%s}' % ENV_MAP[t]
        elif t in LISTMAP:
            yield u'\\end{%s}\n' % LISTMAP[t]
        elif t in (ORDERED_ITEM, UNORDERED_ITEM, BLOCKQUOTE_LINE):
            yield '\n'
        elif t == PARAGRAPH:
            yield ''
        elif t in (LINK, HEADING):
            yield '}'
        elif t == CODEBLOCK:
            self.verbatim = False
            yield u'\n\end{verbatim}'
        elif t == NOINDENT:
            yield ''
        elif t == ERROR:
            yield '}}'
        elif t == TABLE:
            tabulartype = ''
            achar = 'l'
            bchar = '|'
            specs = []
            if arg:
                if 'align' in arg:
                    achar = arg['align'][0].lower()
                if 'border' in arg and not arg['border']:
                    bchar = ''
                if 'specs' in arg:
                    specs = arg['specs']
                if 'mode' in arg and arg['mode'] == 'equal':
                    tablulartype = 'x'
                    achar = achar.upper()
            while len(specs) < self.longest_row:
                specs.append(achar)
            yield '\\endtable%s{%s%s%s}\n' % (tabulartype,
                                              bchar, bchar.join(specs), bchar)
        elif t == TABLE_ROW:
            if self.row_length > self.longest_row:
                self.longest_row = self.row_length
            yield u'\\\\\n'
            if self.tableborder:
                yield '\\hline\n'
        elif t == TABLE_CELL:
            yield ''
        elif t == TABLE_HEADING:
            yield '}'
        elif t == IMAGE:
            yield '}'
        else:
            assert False, t

    def entity(self, t, arg=None):
        if t == HRULE:
            yield ur'\hrule{}'
        elif t == LINEBREAK:
            yield u'\\\\'
        elif t == ENV_BREAK:
            yield u'\n\n'
        elif t == NOINDENT:
            yield u'\\noindent{}'
        elif t == ERROR:
            for s in self.start(ERROR):
                yield s
            yield arg
            for e in self.end(ERROR):
                yield e
        elif t == REF:
            yield placeholder(arg)
        else:
            assert False, t
