import gdata.service
import urllib
import youtomb.db
import re

class Explorer(object):
    def __init__(self):
        self.cache = set()
        self.cache_size = 100
        self.url_re = {'YouTube': re.compile("http://(?:[^.]+\.)?youtube\.com/(?:watch\?v=|v/)(.+?)(?:[^A-Za-z0-9_-].*)?$")}

    def set_cache(self, site_name, ext_id):
        self.cache.add((site_name, ext_id))
        if len(self.cache) > self.cache_size:
            self.cache.pop() #TODO(price): non-arbitrary, say FIFO, cache policy

    def gdata_request(self, url, params=(), server="gdata.youtube.com"):
        service = gdata.service.GDataService(server=server)
        return service.Get(url % tuple(urllib.quote_plus(p) for p in params))

    def register_artifact(self,
                          site_name,
                          ext_id,
                          discover_source,
                          user=None,
                          pub=None):
        if (site_name, ext_id) in self.cache:
            return

        #youtube specific
        if user is None or pub is None:
            (user, pub) = self.get_user_pub(ext_id)

        db = youtomb.db.Database()
        db.execute('insert ignore into artifacts set'
                   +' site_id=%s, external_id=%s,'
                   +' discoverer=%s, active="active", user=%s,'
                   +' time_published=%s',
                   [db.site_id_for_name(site_name), ext_id, discover_source, user, pub])

        print site_name,"video \""+ext_id+"\" by",user,"from",
        print discover_source,pub
        self.set_cache(site_name, ext_id)

    def match_url(self, url):
    	for site, regexp in self.url_re.iteritems():
    		m = regexp.match(url)
    		if m:
    			return (site, m.group(1))
    	return None

    # youtube specific
    def get_user_pub(self, ext_id):
        try:
            data = self.gdata_request("http://gdata.youtube.com/feeds/api/"
                                      +"videos/%s",(ext_id,))
        except gdata.service.RequestError, e:
            return (None, None)
        return (data.author[0].name.text,data.published.text)
