import os, re, RDF
import subprocess
from proxy import http
import urllib2

is_list = lambda x: not x is None and type(x) is list
is_rdf_uri = lambda x: not x is None and type(x) is RDF.Uri
is_statement = lambda x: not x is None and type(x) is RDF.Statement
is_string = lambda x: not x is None and isinstance(x, basestring)
#is_dict = lambda x: not x is None and isinstance(x, dict)
is_dict = lambda x: not x is None and hasattr(x, '__getitem__')
is_spo_uri = lambda x: type(x) is tuple and len(x) == 2 and x[0] == 'uri'
is_spo_var = lambda x: type(x) is tuple and len(x) == 2 and x[0] == 'variable'
is_spo_str = lambda x: type(x) is tuple and len(x) == 2 and x[0] == 'string'
has_var = lambda x: len(filter(is_spo_var, x)) > 0
spo_vars = lambda x: map(lambda x: x[1], filter(is_spo_var, x))
def spo_node(x):
    if len(x) == 2:
        if is_spo_uri(x): return RDF.Uri(x[1])
        elif is_spo_var(x): return None
        elif is_spo_str(x): return x[1]
        else: return x
spo_stmt = lambda x: len(x) == 3 and \
         RDF.Statement(subject=spo_node(x[0]),
                       predicate=spo_node(x[1]),
                       object=spo_node(x[2]))

isBlank = lambda x: isinstance(x, basestring) and len(x)>2 and x[0:2] == u'_:'
isSymbol = lambda x: isinstance(x, basestring) and len(x)>1 and x[0] == u'<' and x[-1] == u'>'
Symbol = lambda x: u'<' + unicode(x) + u'>'
unwrap = lambda x: isSymbol(x) and x[1:-1] or x
unfrag = lambda x: '#' in x and (x[:x.find('#')==-1 and len(x) or x.find('#')] + (x[0]=='<' and '>' or '')) or x
frag = lambda x: x[x.find('#')==-1 and len(x) or x.find('#'):len(x)-(x[-1]=='>')]
cpfrag = lambda x,y: unfrag(y)[-1] == '>' and unfrag(y)[:-1]+frag(x)+'>' or unfrag(y)+frag(x)

_RDFS = lambda x: RDF.Uri('http://www.w3.org/1999/02/22-rdf-syntax-ns#%s' % x)
_ACLS = lambda x: RDF.Uri('http://www.w3.org/2001/02/acls/ns#%s' % x)
_Q = lambda x: RDF.Uri('http://presbrey.servers.csail.mit.edu/ns/queries#%s' % x)
NIL = RDF.Node(_RDFS('nil'))

class Model(object):
    def __init__(self, name, new=True, storage_name='sqlite'):
        self._storage = RDF.Storage(name=name,
                                    storage_name=storage_name,
                                    options_string=("new='%s'" % (new and 'yes' or 'no')))
        if self._storage is None:
            raise 'new RDF.Storage failed'
        self._model = RDF.Model(self._storage)
        if self._model is None:
            raise 'new RDF.Model failed'

    def add_uri(self, uri):
        return self.add_matching(uri)

    def add_uri_safe(self, uri):
        return self.add_matching(uri,
                                 lambda x: x.subject.is_resource() and \
                                 unfrag(unicode(x.subject.uri)) == unfrag(unicode(uri)))

    def add_matching(self, uri, callback=None):
        # load triples at uri with matching fields to statement
        if not is_rdf_uri(uri):
            uri = RDF.Uri(uri)
        parser = RDF.Parser('raptor')
        if parser is None:
            raise 'new RDF.Parser(raptor) failed'
        context = RDF.Node(uri)
        rdftxt = http(str(uri)).read()
        try:
            if callback is None:
                self._model.add_statements(parser.parse_string_as_stream(rdftxt, uri), context)
            elif callable(callback):
                for x in parser.parse_string_as_stream(rdftxt, uri):
                    if not callback(x): continue
                    else: self._model.add_statement(x, context)
            else:
                raise 'add_matching: Invalid callback type'
        except RDF.RedlandError:
            # Redland parse error
            pass

    def _blank_collection(self, blankNode):
        first, rest = _RDFS('first'), _RDFS('rest')
        while blankNode and blankNode != NIL:
            yield self.get_target(blankNode, first)
            blankNode = self.get_target(blankNode, rest)

    def unwrap(self, node):
        if not type(node) is RDF.Node:
            return None
        elif node.is_resource():
            return node.uri
        elif node.is_literal():
            return node.literal_value['string']
        elif node.is_blank():
            return map(self.unwrap,
                       self._blank_collection(node))
        else:
            raise 'unwrap: Invalid RDF.Node type'

    def __getattr__(self, k): return getattr(self._model, k)
    def __iter__(self): return iter(self._model)

class Metadata(Model):
    def _rules(self):
        r = []
        for w in self.sources(_RDFS('type'), _ACLS('ResourceAccessRule')):
            access_to = self.unwrap(self.get_target(w, _ACLS('hasAccessTo')))
            methods = self.unwrap(self.get_target(w, _ACLS('methods')))
            if is_string(methods):
                methods = map(lambda x: x.upper(), methods.split(','))
            allow = self.unwrap(self.get_target(w, _ACLS('allow')))
            if is_string(allow):
                allow = allow.upper()
            if not access_to or not methods or not allow:
                continue
            else:
                r.append({'access_to': access_to,
                          'methods': methods,
                          'allow': allow})
        return r

    def _filter_rules(self, uri, method):
        if not is_rdf_uri(uri):
            uri = RDF.Uri(uri)
        method = method.upper()
        return lambda x: \
            uri == x['access_to'] and \
            (method in x['methods'] or
             '*' in x['methods'])

    def rules(self, uri, method):
        return map(lambda x: x['allow'],
                   filter(self._filter_rules(uri, method),
                          self._rules()))

class Query(Model):
    def __init__(self, uri, env=os.environ):
        Model.__init__(self, ':memory:')
        self._base_uri = str(uri)
        self._env = env
        self._uri = {unfrag(str(uri)):True}
        self._from_uri = []
        self._from_var = []

        self.add_uri_safe(str(uri))
        self._query = self.unwrap(self.get_target(uri, _Q('query')))
        self._environ = self.unwrap(self.get_target(uri, _Q('environ')))
        self._bind = self.unwrap(self.get_target(uri, _Q('bind')))
        self._eval = self.unwrap(self.get_target(uri, _Q('eval')))

        # produce queries valid enough for parsing
        self._parse_query()
        # parse constraint triples from where clause
        self._constraints = [x for x in self._iter_constraints()]
        # fetch FROM <URI> resources
        self._add_uris(self._from_uri)
        self.resolver = self.var_resolver()

    def _add_uris(self, lst):
        i = 0
        for x in lst:
            if not x in self._uri or not self._uri[x]:
                print '(%s) %s' % (self._base_uri, x)
                i += 1
                self._uri[x] = True
                self.add_uri_safe(x)
        return i

    def _expand(self, text):
        if is_string(self._environ) and len(self._environ):
            for x in re.findall('(%s{)(\w+)(})' % self._environ, text):
                if x[1] in self._env:
                    text = text.replace(''.join(x), self._env[x[1]])
        return text

    def _parse_query(self):
        cleaned, hacked = self._query, self._query
        m = re.findall('(\s*FROM\s.+)+.*WHERE', self._query, re.DOTALL)
        if m and len(m):
            # store query without FROM clauses
            query = self._query.split(m[0])
            from0 = m[0].split('\n')
            
            # build valid FROM clauses
            from2 = []
            for i, x in enumerate(from0):
                from1 = x.split(' ')
                from1_i = len(from1) - 1
                if len(from1) and len(from1[from1_i]):
                    if from1[from1_i][0] == '?':
                        # store FROM variable clauses
                        self._from_var.append(from1[from1_i][1:])
                        continue
                    else:
                        # store FROM resource clauses
                        self._from_uri.append(unwrap(from1[from1_i]))
                from2.append(x)
            
            # save literal query
            self._query = '\n'.join(from2).join(query)

    def _parse_spo(self, line):
        r = []; w = ''
        s = line.split(', ')
        for x in s:
            m = re.match('^(\w+)([\<\(]+)(.*)([\)\>]+)$', w+x)
            if m:
                g = m.groups()
                r.append((g[0], g[2].strip('"')))
                if w: w = ''
            else:
                w = x + ', '
        if not len(r) == 3:
            raise Exception('error parsing SPARQL query triple: %s' % str(r))
        return r

    def _iter_constraints(self):
        # generator for WHERE clause triples
        argv = ['roqet', '-f', 'noNet', '--dump-query', 'structure', '-i', 'sparql', '-', self._base_uri]
        p = subprocess.Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        p.stdin.write(self._expand(self._query))
        p.stdin.close()
        if p.poll(): p.wait()
        o = p.stdout.read()
        for t_triple in re.findall('triple #\d+ { triple\((.+)\) .*}', o):
            spo = self._parse_spo(t_triple)
            yield map(lambda x: (x[0], self._expand(x[1])), spo)

    def var_constraints(self, variable=None):
        if variable is None:
            r = []
            for x in self._constraints:
                for y in self._from_var:
                    if y in spo_vars(x):
                        r.append(x)
                        break
            return r
        else:
            return filter(lambda x: variable in spo_vars(x), self._constraints)

    def var_resolver(self, variable=None):
        var_c = self.var_constraints(variable)
        var_d = map(spo_stmt, var_c)
        def resolver():
            r = []
            for x, y in zip(var_c, var_d):
                for z in self.find_statements(y):
                    if x[0][1] in self._from_var and z.subject.is_resource():
                        r.append(str(z.subject.uri))
                    if x[1][1] in self._from_var and z.predicate.is_resource():
                        r.append(str(z.predicate.uri))
                    if x[2][1] in self._from_var and z.predicate.is_resource():
                        r.append(str(z.object.uri))
            return r
        return resolver

    def resolve(self):
        n = 1
        while n > 0:
            r = self.resolver()
            n = self._add_uris(r)

    def query(self):
        return self._expand(self._query)

    def execute(self):
        return RDF.Query(str(self.query()),
                         query_language='sparql',
                         base_uri=RDF.Uri(self._base_uri)).execute(self._model)

if __name__ == '__main__':
    metadata = map(RDF.Uri, ['http://presbrey.xvm.mit.edu/.metadata.rdf',
                             'http://presbrey.xvm.mit.edu/friends/.metadata.rdf'])
    m = Metadata(':memory:')
    map(m.add_uri, metadata)
    for x in m.rules('http://presbrey.xvm.mit.edu/friends/', 'get'):
        if is_rdf_uri(x):
            x = [x]
        for y in x:
            if not is_rdf_uri(y):
                continue
            print 'Running query:', y
            q = Query(y)
            q.resolve()
            print q.execute()
