;ò
ñŒü?c           @   s°   d  Z  d k Z d k Z d g Z d a d „  Z d f  d „  ƒ  YZ d f  d „  ƒ  YZ d f  d	 „  ƒ  YZ d
 e i	 f d „  ƒ  YZ
 d „  Z d „  Z e d j o e ƒ  n d S(   s<   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
Ns   RobotFileParseri    c         C   s   t  o	 |  GHn d  S(   N(   s   debugs   msg(   s   msg(    (    s(   /mit/python/lib/python2.3/robotparser.pys   _debug   s     c           B   sb   t  Z d  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z RS(   ss    This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    s    c         C   s>   g  |  _ t |  _ d |  _ d |  _ |  i | ƒ d |  _ d  S(   Ni    (	   s   selfs   entriess   Nones   default_entrys   disallow_alls	   allow_alls   set_urls   urls   last_checked(   s   selfs   url(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __init__   s    				c         C   s   |  i Sd S(   s·   Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        N(   s   selfs   last_checked(   s   self(    (    s(   /mit/python/lib/python2.3/robotparser.pys   mtime$   s     c         C   s   d k  } | i  ƒ  |  _ d S(   sY   Sets the time the robots.txt file was last fetched to the
        current time.

        N(   s   times   selfs   last_checked(   s   selfs   time(    (    s(   /mit/python/lib/python2.3/robotparser.pys   modified-   s     	c         C   s/   | |  _  t i | ƒ d d !\ |  _ |  _ d S(   s,   Sets the URL referring to a robots.txt file.i   i   N(   s   urls   selfs   urlparses   hosts   path(   s   selfs   url(    (    s(   /mit/python/lib/python2.3/robotparser.pys   set_url5   s     	c         C   sû   t  ƒ  } | i |  i ƒ } g  } | i ƒ  } x+ | o# | i	 | i
 ƒ  ƒ | i ƒ  } q0 W| i |  _ |  i d j p |  i d j o d |  _ t d ƒ nZ |  i d j o d |  _ t d ƒ n3 |  i d j o | o t d ƒ |  i | ƒ n d	 S(
   s4   Reads the robots.txt URL and feeds it to the parser.i‘  i“  i   s   disallow alli  s	   allow alliÈ   s   parse linesN(   s	   URLopeners   openers   opens   selfs   urls   fs   liness   readlines   lines   appends   strips   errcodes   disallow_alls   _debugs	   allow_alls   parse(   s   selfs   fs   liness   openers   line(    (    s(   /mit/python/lib/python2.3/robotparser.pys   read:   s&     	  		
c         C   s1   d | i j o | |  _ n |  i i | ƒ d  S(   Ns   *(   s   entrys
   useragentss   selfs   default_entrys   entriess   append(   s   selfs   entry(    (    s(   /mit/python/lib/python2.3/robotparser.pys
   _add_entryN   s    c         C   s­  d } d } t ƒ  } x\| D]T} | d } | o_ | d j o! t d | ƒ t ƒ  } d } q“ | d j o  |  i | ƒ t ƒ  } d } q“ n | i	 d ƒ } | d j o | |  } n | i ƒ  } | o q n | i d d ƒ } t | ƒ d j o_| d i ƒ  i ƒ  | d <t i | d i ƒ  ƒ | d <| d d j oS | d j o( t d | ƒ |  i | ƒ t ƒ  } n | i i | d ƒ d } qp| d d	 j oF | d j o t d
 | ƒ qX| i i t | d d ƒ ƒ d } qp| d d j o@ | d j o t d
 | ƒ qX| i i t | d d ƒ ƒ qpt d | | d f ƒ q t d | | f ƒ q W| d j o |  i i | ƒ n t d t |  ƒ ƒ d S(   s   parse the input lines from a robot.txt file.
           We allow that a user-agent: line is not preceded by
           one or more blank lines.i    i   s]   line %d: warning: you should insert allow: or disallow: directives below any user-agent: linei   s   #s   :s
   user-agentsP   line %d: warning: you should insert a blank line before any user-agent directives   disallowsH   line %d: error: you must insert a user-agent: directive before this lines   allows    line %d: warning: unknown key %ss!   line %d: error: malformed line %ss   Parsed rules:
%sN(   s   states
   linenumbers   Entrys   entrys   liness   lines   _debugs   selfs
   _add_entrys   finds   is   strips   splits   lens   lowers   urllibs   unquotes
   useragentss   appends	   ruleliness   RuleLines   entriess   str(   s   selfs   liness   is
   linenumbers   states   entrys   line(    (    s(   /mit/python/lib/python2.3/robotparser.pys   parseU   s^     	 
	
	

!c         C   s¿   t  d | | f ƒ |  i o t Sn |  i o t Sn t i	 t
 i
 t i | ƒ ƒ d ƒ p d } x2 |  i D]' } | i | ƒ o | i | ƒ Sqn qn W|  i o |  i i | ƒ Sn t Sd S(   s=   using the parsed robots.txt decide if useragent can fetch urls<   Checking robot.txt allowance for:
  user agent: %s
  url: %si   s   /N(   s   _debugs	   useragents   urls   selfs   disallow_alls   Falses	   allow_alls   Trues   urllibs   quotes   urlparses   unquotes   entriess   entrys
   applies_tos	   allowances   default_entry(   s   selfs	   useragents   urls   entry(    (    s(   /mit/python/lib/python2.3/robotparser.pys	   can_fetch•   s     

,
 
c         C   s6   d } x% |  i D] } | t | ƒ d } q W| Sd  S(   Ns    s   
(   s   rets   selfs   entriess   entrys   str(   s   selfs   entrys   ret(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __str__ª   s
    
 (   s   __name__s
   __module__s   __doc__s   __init__s   mtimes   modifieds   set_urls   reads
   _add_entrys   parses	   can_fetchs   __str__(    (    (    s(   /mit/python/lib/python2.3/robotparser.pys   RobotFileParser   s    							@	s   RuleLinec           B   s)   t  Z d  Z d „  Z d „  Z d „  Z RS(   sh   A rule line is a single "Allow:" (allowance==1) or "Disallow:"
       (allowance==0) followed by a path.c         C   s>   | d j o | o
 d } n t i | ƒ |  _  | |  _ d  S(   Ns    i   (   s   paths	   allowances   urllibs   quotes   self(   s   selfs   paths	   allowance(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __init__´   s    
c         C   s$   |  i d j p | i |  i ƒ Sd  S(   Ns   *(   s   selfs   paths   filenames
   startswith(   s   selfs   filename(    (    s(   /mit/python/lib/python2.3/robotparser.pys
   applies_to»   s    c         C   s$   |  i o d p d d |  i Sd  S(   Ns   Allows   Disallows   : (   s   selfs	   allowances   path(   s   self(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __str__¾   s    (   s   __name__s
   __module__s   __doc__s   __init__s
   applies_tos   __str__(    (    (    s(   /mit/python/lib/python2.3/robotparser.pys   RuleLine±   s    		s   Entryc           B   s2   t  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   s?   An entry has one or more user-agents and zero or more rulelinesc         C   s   g  |  _ g  |  _ d  S(   N(   s   selfs
   useragentss	   rulelines(   s   self(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __init__Ä   s    	c         C   s\   d } x# |  i D] } | d | d } q Wx% |  i D] } | t | ƒ d } q6 W| Sd  S(   Ns    s   User-agent: s   
(   s   rets   selfs
   useragentss   agents	   ruleliness   lines   str(   s   selfs   lines   agents   ret(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __str__È   s    
 
 c         C   st   | i d ƒ d i ƒ  } xP |  i D]E } | d j o t Sn | i ƒ  } | i | ƒ d j o t Sq# q# Wt Sd S(   s2   check if this entry applies to the specified agents   /i    s   *iÿÿÿÿN(	   s	   useragents   splits   lowers   selfs
   useragentss   agents   Trues   finds   False(   s   selfs	   useragents   agent(    (    s(   /mit/python/lib/python2.3/robotparser.pys
   applies_toÐ   s     
 c         C   sS   xH |  i D]= } t | t | ƒ | i f ƒ | i | ƒ o | i Sq
 q
 Wd Sd S(   sZ   Preconditions:
        - our agent applies to this entry
        - filename is URL decodedi   N(   s   selfs	   ruleliness   lines   _debugs   filenames   strs	   allowances
   applies_to(   s   selfs   filenames   line(    (    s(   /mit/python/lib/python2.3/robotparser.pys	   allowanceÝ   s     
 (   s   __name__s
   __module__s   __doc__s   __init__s   __str__s
   applies_tos	   allowance(    (    (    s(   /mit/python/lib/python2.3/robotparser.pys   EntryÂ   s
    			s	   URLopenerc           B   s   t  Z d „  Z d „  Z RS(   Nc         G   s    t  i i |  | Œ d |  _ d  S(   NiÈ   (   s   urllibs   FancyURLopeners   __init__s   selfs   argss   errcode(   s   selfs   args(    (    s(   /mit/python/lib/python2.3/robotparser.pys   __init__è   s    c         C   s,   | |  _  t i i |  | | | | | ƒ Sd  S(   N(	   s   errcodes   selfs   urllibs   FancyURLopeners   http_error_defaults   urls   fps   errmsgs   headers(   s   selfs   urls   fps   errcodes   errmsgs   headers(    (    s(   /mit/python/lib/python2.3/robotparser.pys   http_error_defaultì   s    	(   s   __name__s
   __module__s   __init__s   http_error_default(    (    (    s(   /mit/python/lib/python2.3/robotparser.pys	   URLopenerç   s   	c         C   s<   | o
 d } n d } |  | j o	 d GHn
 d | GHHd  S(   Ns   access denieds   access alloweds   faileds   ok (%s)(   s   bs   acs   a(   s   as   bs   ac(    (    s(   /mit/python/lib/python2.3/robotparser.pys   _checkñ   s    
		c          C   s†  t  ƒ  }  d a |  i d ƒ |  i ƒ  t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d	 d ƒ d ƒ t |  i d
 d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ |  i d ƒ |  i ƒ  t |  i d d ƒ d ƒ d  S(   Ni   s"   http://www.musi-cal.com/robots.txts   *s   http://www.musi-cal.com/s    i    s   CherryPickerSEs?   http://www.musi-cal.com/cgi-bin/event-search?city=San+Franciscos   CherryPickerSE/1.0s   CherryPickerSE/1.5s   ExtractorPros   http://www.musi-cal.com/blubbas   extractorpros   toolpak/1.1s   spams   http://www.musi-cal.com/searchs#   http://www.musi-cal.com/Musician/mes   http://www.lycos.com/robots.txts   Mozillas   http://www.lycos.com/search(   s   RobotFileParsers   rps   debugs   set_urls   reads   _checks	   can_fetch(   s   rp(    (    s(   /mit/python/lib/python2.3/robotparser.pys   _testü   s4     	

s   __main__(   s   __doc__s   urlparses   urllibs   __all__s   debugs   _debugs   RobotFileParsers   RuleLines   Entrys   FancyURLopeners	   URLopeners   _checks   _tests   __name__(
   s	   URLopeners   __all__s   _debugs   _tests   urllibs   urlparses   _checks   RuleLines   Entrys   RobotFileParser(    (    s(   /mit/python/lib/python2.3/robotparser.pys   ?   s   		›%
		'