import sys
import time
import urllib
import urllib2
import simplejson

import youtomb.db
from youtomb.explore.explorer import Explorer

DIGG_API_KEY="http://youtomb.mit.edu/"

class DiggExplorer(Explorer):
    def __init__(self, timeout_sec=300):
        super(DiggExplorer, self).__init__()
        self.timeout_sec = timeout_sec
        self.top_number = 100

    def digg_make_request(self, endpoint, params=()):
        url = "http://services.digg.com"+(endpoint % tuple(urllib.quote_plus(str(p)) for p in params))
        print "Requesting", url
        retval = urllib2.Request(url)
        # Configure our agent
        retval.add_header('User-Agent',
                          'YouTomb/1.0 DiggExplorer/$Rev: 334 $')
        retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
        retval.add_header('Accept', 'application/json')
        retval.add_header('Accept-Language', 'en-us,en;q=0.5,*;q=0.4')
        return retval
    
    def digg_request(self, endpoint, params=()):
        request = self.digg_make_request(endpoint, params)
        response = urllib2.urlopen(request).read()
        return simplejson.loads(response)
        
    def run(self):
        print ('starting DiggExplorer: top_number %d, timeout %ss'
               % (self.top_number, self.timeout_sec))
        
        min_date = None
        
        db = youtomb.db.Database()
        
        while True:
            #data = self.digg_request("/stories/container/videos?type=json&appkey=%s&count=%s", (DIGG_API_KEY,100))
            (url, params) = ("/stories/container/videos?type=json&appkey=%s&count=%s&domain=%s", (DIGG_API_KEY,100,"youtube.com"))
            if min_date:
            	url += "&min_date=%s"
            	params += (min_date,)
            sys.stdout.flush()
            data = self.digg_request(url, params)
            print 'got %d items (of %s) since %s' % (data["count"], data["total"], str(min_date))
            if data["timestamp"]:
            	min_date = data["timestamp"]
            db.begin_transaction()
            for story in data["stories"]:
                match = self.match_url(story["link"])
                if match:
                    site, ext_id = match
                    aid = self.register_artifact(site_name=site,
                                                 ext_id=ext_id,
                                                 discover_source="DiggExplorer")
                else:
                    print "Didn't match %s" % (story["link"])
            db.commit_transaction()
            sys.stdout.flush()
            time.sleep(self.timeout_sec)

if __name__=="__main__":
      e = DiggExplorer()
      e.run()
