;
ะโ"Ic               @   s[   d  Z  d d l Z d d l Z d g Z Gd   d  Z Gd   d  Z Gd   d  Z d S(	   u<   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
i    Nu   RobotFileParserc             B   sh   |  Ee  Z d  Z d d  Z d   Z d   Z d   Z d   Z d   Z d   Z	 d	   Z
 d
   Z d S(   us    This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    u    c             C   s>   g  |  _  d  |  _ d |  _ d |  _ |  j |  d |  _ d  S(   Ni    F(   u   entriesu   Noneu   default_entryu   Falseu   disallow_allu	   allow_allu   set_urlu   last_checked(   u   selfu   url(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   __init__   s    				c             C   s   |  j  S(   uท   Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (   u   last_checked(   u   self(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   mtime   s    c             C   s   d d l  } | j    |  _ d S(   uY   Sets the time the robots.txt file was last fetched to the
        current time.

        i    N(   u   timeu   last_checked(   u   selfu   time(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   modified(   s    c             C   s5   | |  _  t j j |  d d  \ |  _ |  _ d S(   u,   Sets the URL referring to a robots.txt file.i   i   N(   u   urlu   urllibu   parseu   urlparseu   hostu   path(   u   selfu   url(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   set_url0   s    	c             C   sฏ   y t  j j |  j  } Wng t  j j k
 oU } z? | j d k o d |  _ n | j d k o d |  _	 n WYd d } ~ Xn* X| j
   } |  j | j d  j    d S(   u4   Reads the robots.txt URL and feeds it to the parser.i  i  i  Nu   utf-8(   i  i  T(   u   urllibu   requestu   urlopenu   urlu   erroru	   HTTPErroru   codeu   Trueu   disallow_allu	   allow_allu   readu   parseu   decodeu
   splitlines(   u   selfu   fu   erru   raw(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   read5   s    !c             C   s1   d | j  k o | |  _ n |  j j |  d  S(   Nu   *(   u
   useragentsu   default_entryu   entriesu   append(   u   selfu   entry(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu
   _add_entryB   s    c             C   s2  d } t    } x๛| D]๓} | pQ | d k o t    } d } qt | d k o  |  j |  t    } d } qt n | j d  } | d k o | d |  } n | j   } | p q n | j d d  } t |  d k o&| d j   j   | d <t j j	 | d j    | d <| d d k oE | d k o |  j |  t    } n | j
 j | d  d } q	| d d k o8 | d k o' | j j t | d d
   d } qq	| d d	 k o8 | d k o' | j j t | d d   d } qq	q q W| d k o |  j j |  n d S(   u   Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        i    i   i   u   #Nu   :u
   user-agentu   disallowu   allowFT(   u   Entryu
   _add_entryu   findu   stripu   splitu   lenu   loweru   urllibu   parseu   unquoteu
   useragentsu   appendu	   rulelinesu   RuleLineu   Falseu   Trueu   entries(   u   selfu   linesu   stateu   entryu   lineu   i(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   parseI   sL    
	 	
	 
c             C   sฎ   |  j  o d S|  j o d St j j t j j t j j |   d  } | p
 d } n x/ |  j	 D]$ } | j
 |  o | j |  Sqg W|  j o |  j j |  Sd S(   u=   using the parsed robots.txt decide if useragent can fetch urli   u   /FT(   u   disallow_allu   Falseu	   allow_allu   Trueu   urllibu   parseu   quoteu   urlparseu   unquoteu   entriesu
   applies_tou	   allowanceu   default_entry(   u   selfu	   useragentu   urlu   entry(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu	   can_fetch|   s    

	%

 
c             C   s   d j  d   |  j D  S(   Nu    c             S   s%   g  } |  ] } | t  |  d  q
 S(   u   
(   u   str(   u   .0u   _[1]u   entry(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu
   <listcomp>   s    (   u   joinu   entries(   u   self(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   __str__   s    N(   u   __name__u
   __module__u   __doc__u   __init__u   mtimeu   modifiedu   set_urlu   readu
   _add_entryu   parseu	   can_fetchu   __str__(   u
   __locals__(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   RobotFileParser   s   
							3	c             B   s/   |  Ee  Z d  Z d   Z d   Z d   Z d S(   uo   A rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c             C   sA   | d k o | o
 d } n t j j |  |  _ | |  _ d  S(   Nu    T(   u   Trueu   urllibu   parseu   quoteu   pathu	   allowance(   u   selfu   pathu	   allowance(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   __init__   s    
c             C   s    |  j  d k p | j |  j   S(   Nu   *(   u   pathu
   startswith(   u   selfu   filename(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu
   applies_to   s    c             C   s    |  j  o d p d d |  j S(   Nu   Allowu   Disallowu   : (   u	   allowanceu   path(   u   self(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   __str__ข   s    N(   u   __name__u
   __module__u   __doc__u   __init__u
   applies_tou   __str__(   u
   __locals__(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   RuleLine   s   
		u   RuleLinec             B   s8   |  Ee  Z d  Z d   Z d   Z d   Z d   Z d S(   u?   An entry has one or more user-agents and zero or more rulelinesc             C   s   g  |  _  g  |  _ d  S(   N(   u
   useragentsu	   rulelines(   u   self(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   __init__จ   s    	c             C   sj   g  } x' |  j  D] } | j d | d g  q Wx* |  j D] } | j t |  d g  q: Wd j |  S(   Nu   User-agent: u   
u    (   u
   useragentsu   extendu	   rulelinesu   stru   join(   u   selfu   retu   agentu   line(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   __str__ฌ   s    
 
 c             C   sa   | j  d  d j   } xA |  j D]6 } | d k o d S| j   } | | k o d Sq# Wd S(   u2   check if this entry applies to the specified agentu   /i    u   *TF(   u   splitu   loweru
   useragentsu   Trueu   False(   u   selfu	   useragentu   agent(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu
   applies_toด   s    
 	c             C   s0   x) |  j  D] } | j |  o | j Sq
 Wd S(   uZ   Preconditions:
        - our agent applies to this entry
        - filename is URL decodedT(   u	   rulelinesu
   applies_tou	   allowanceu   True(   u   selfu   filenameu   line(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu	   allowanceม   s
    
 N(   u   __name__u
   __module__u   __doc__u   __init__u   __str__u
   applies_tou	   allowance(   u
   __locals__(    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   Entryฆ   s
   
			u   Entry(   u   __doc__u   urllib.parseu   urllibu   urllib.requestu   __all__u   RobotFileParseru   RuleLineu   Entry(    (    (    u/   /mit/python/lib/python3.0/urllib/robotparser.pyu   <module>   s
   	