#!/usr/bin/env python
##
##  sexpr.py - by Yusuke Shinyama
##
##  * public domain *
##
##  from http://www.unixuser.org/~euske/python/index.html:
##  The following files are in public domain except where otherwise noted. THESE FILES COME WITH ABSOLUTELY NO WARRANTY.

from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer


##  SExprReader
##
class SExprReader(AbstractFilter):
  """Usage:
  
  reader = SExprReader(consumer)
  reader.feed("(this is (sexpr))")
  reader.close()
  """
  
  COMMENT_BEGIN = ";"
  COMMENT_END = "\n"
  SEPARATOR = " \t\n"
  PAREN_BEGIN = "("
  PAREN_END = ")"
  QUOTE = '"'
  ESCAPE = "\\"

  def __init__(self, next_filter,
               comment_begin=COMMENT_BEGIN,
               comment_end=COMMENT_END,
               separator=SEPARATOR,
               paren_begin=PAREN_BEGIN,
               paren_end=PAREN_END,
               quote=QUOTE,
               escape=ESCAPE):
    AbstractFilter.__init__(self, next_filter)
    self.comment_begin = comment_begin
    self.comment_end = comment_end
    self.separator = separator
    self.paren_begin = paren_begin
    self.paren_end = paren_end
    self.quote = quote
    self.escape = escape
    self.special = comment_begin + separator + paren_begin + paren_end + quote + escape
    self.reset()
    return

  # SExprReader ignores any error and
  # try to continue as long as possible.
  # if you want to throw exception however,
  # please modify these methods.
  
  # called if redundant parantheses are found.
  def illegal_close_paren(self, i):
    print "Ignore a close parenthesis: %d" % i
    return
  # called if it reaches the end-of-file while the stack is not empty.
  def premature_eof(self, i, x):
    print "Premature end of file: %d parens left, partial=%s" % (i, x)
    return

  # reset the internal states.
  def reset(self):
    self.incomment = False              # if within a comment.
    self.inquote = False                # if within a quote.
    self.inescape = False               # if within a escape.
    self.sym = ''                       # partially constructed symbol.
    # NOTICE: None != nil (an empty list)
    self.build = None                   # partially constructed list.
    self.build_stack = []     # to store a chain of partial lists.
    return self

  # analyze strings
  def feed(self, tokens):
    for (i,c) in enumerate(tokens):
      if self.incomment:
        # within a comment - skip
        self.incomment = (c not in self.comment_end)
      elif self.inescape or (c not in self.special):
        # add to the current working symbol
        self.sym += c
        self.inescape = False
      elif c in self.escape:
        # escape
        self.inescape = True
      elif self.inquote and (c not in self.quote):
        self.sym += c
      else:
        # special character (blanks, parentheses, or comment)
        if self.sym:
          # close the current symbol
          if self.build == None:
            self.feed_next(self.sym)
          else:
            self.build.append(self.sym)
          self.sym = ''
        if c in self.comment_begin:
          # comment
          self.incomment = True
        elif c in self.quote:
          # quote
          self.inquote = not self.inquote
        elif c in self.paren_begin:
          # beginning a new list.
          self.build_stack.append(self.build)
          empty = []
          if self.build == None:
            # begin from a scratch.
            self.build = empty
          else:
            # begin from the end of the current list.
            self.build.append(empty)
            self.build = empty
        elif c in self.paren_end:
          # terminating the current list
          if self.build == None:
            # there must be a working list.
            self.illegal_close_paren(i)
          else:
            if len(self.build_stack) == 1:
              # current working list is the last one in the stack.
              self.feed_next(self.build)
            self.build = self.build_stack.pop()
    return self

  # terminate
  def terminate(self):
    # a working list should not exist.
    if self.build != None:
      # error - still try to construct a partial structure.
      if self.sym:
        self.build.append(self.sym)
        self.sym = ''
      if len(self.build_stack) == 1:
        x = self.build
      else:
        x = self.build_stack[1]
      self.build = None
      self.build_stack = []
      self.premature_eof(len(self.build_stack), x)
    elif self.sym:
      # flush the current working symbol.
      self.feed_next(self.sym)
    self.sym = ''
    return self

  # closing.
  def close(self):
    AbstractFilter.close(self)
    self.terminate()
    return


##  StrictSExprReader
##
class SExprIllegalClosingParenError(ValueError):
  """It throws an exception with an ill-structured input."""
  pass
class SExprPrematureEOFError(ValueError):
  pass
class StrictSExprReader(SExprReader):
  def illegal_close_paren(self, i):
    raise SExprIllegalClosingParenError(i)
  def premature_eof(self, i, x):
    raise SExprPrematureEOFError(i, x)
  

##  str2sexpr
##
class _SExprStrConverter(AbstractConsumer):
  results = []
  def feed(self, s):
    _SExprStrConverter.results.append(s)
    return
_str_converter = SExprReader(_SExprStrConverter())
_str_converter_strict = StrictSExprReader(_SExprStrConverter())

def str2sexpr(s):
  """parse a string as a sexpr."""
  _SExprStrConverter.results = []
  _str_converter.reset().feed(s).terminate()
  return _SExprStrConverter.results
def str2sexpr_strict(s):
  """parse a string as a sexpr."""
  _SExprStrConverter.results = []
  _str_converter_strict.reset().feed(s).terminate()
  return _SExprStrConverter.results


##  sexpr2str
##
def sexpr2str(e):
  """convert a sexpr into Lisp-like representation."""
  if not isinstance(e, list):
    return e
  return "("+" ".join(map(sexpr2str, e))+")"


# test stuff
def test():
  assert str2sexpr("(this ;comment\n is (a test (sentences) (des()) (yo)))") == \
         [["this", "is", ["a", "test", ["sentences"], ["des", []], ["yo"]]]]
  assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol"
                   this\\ way\\ also. "escape is \\"better than\\" quote")''') == \
         [['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']]
  str2sexpr("(this (is (a (parial (sentence")
  return  


# main
if __name__ == "__main__":
  test()
