Ñò
Žà"Ic           @   s  d  Z  d d k Z d d k Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z	 e i d	 ƒ Z
 e i d
 ƒ Z e i d ƒ Z e i d ƒ Z e i d e i ƒ Z e i d	 ƒ Z e i d ƒ Z d e f d „  ƒ  YZ d e i f d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.iÿÿÿÿNs   [&<]s   <(/|\Z)s
   &[a-zA-Z#]s%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	   <[a-zA-Z]t   >s   --\s*>s   [a-zA-Z][-.a-zA-Z0-9:_]*s_   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?sê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B   s#   e  Z d  Z d d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s'   | |  _  | d |  _ | d |  _ d  S(   Ni    i   (   t   msgt   linenot   offset(   t   selfR   t   position(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   __init__4   s    	c         C   s[   |  i  } |  i d  j	 o | d |  i } n |  i d  j	 o | d |  i d } n | S(   Ns   , at line %ds   , column %di   (   R   R   t   NoneR   (   R   t   result(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   __str__:   s    	N(   NN(   t   __name__t
   __module__t   __doc__R   R   R
   (    (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR   1   s   t
   HTMLParserc           B   sø   e  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z d „  Z RS(   sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  i  ƒ  d S(   s#   Initialize and reset this instance.N(   t   reset(   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR   Z   s    c         C   s/   d |  _  d |  _ t |  _ t i i |  ƒ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???N(   t   rawdatat   lasttagt   interesting_normalt   interestingt
   markupbaset
   ParserBaseR   (   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR   ^   s    			c         C   s!   |  i  | |  _  |  i d ƒ d S(   s   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '
').
        i    N(   R   t   goahead(   R   t   data(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   feede   s    c         C   s   |  i  d ƒ d S(   s   Handle any buffered data.i   N(   R   (   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   closen   s    c         C   s   t  | |  i ƒ  ƒ ‚ d  S(   N(   R   t   getpos(   R   t   message(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   errorr   s    c         C   s   |  i  S(   s)   Return full source of start tag: '<...>'.(   t   _HTMLParser__starttag_text(   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   get_starttag_textw   s    c         C   s   t  |  _ d  S(   N(   t   interesting_cdataR   (   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   set_cdata_mode{   s    c         C   s   t  |  _ d  S(   N(   R   R   (   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   clear_cdata_mode~   s    c   
      C   sÝ  |  i  } d } t | ƒ } xp| | j  ob|  i i | | ƒ } | o | i ƒ  } n | } | | j  o |  i | | | !ƒ n |  i | | ƒ } | | j o Pn | i } | d | ƒ ot i	 | | ƒ o |  i
 | ƒ } nº | d | ƒ o |  i | ƒ } n— | d | ƒ o |  i | ƒ } nt | d | ƒ o |  i | ƒ } nQ | d | ƒ o |  i | ƒ } n. | d | j  o |  i d ƒ | d } n P| d j  o | o |  i d ƒ n Pn |  i | | ƒ } q | d	 | ƒ o… t i	 | | ƒ } | og | i ƒ  d
 d !}	 |  i |	 ƒ | i ƒ  } | d | d ƒ p | d } n |  i | | ƒ } q q‰Pq | d | ƒ ot i	 | | ƒ } | oc | i d ƒ }	 |  i |	 ƒ | i ƒ  } | d | d ƒ p | d } n |  i | | ƒ } q n t i	 | | ƒ } | o4 | o( | i ƒ  | | j o |  i d ƒ n Pq‰| d | j  o' |  i d ƒ |  i | | d ƒ } q‰Pq q W| o7 | | j  o* |  i | | | !ƒ |  i | | ƒ } n | | |  _  d  S(   Ni    t   <s   </s   <!--s   <?s   <!i   s   EOF in middle of constructs   &#i   iÿÿÿÿt   ;t   &s#   EOF in middle of entity or char ref(   R   t   lenR   t   searcht   startt   handle_datat	   updatepost
   startswitht   starttagopent   matcht   parse_starttagt   parse_endtagt   parse_commentt   parse_pit   parse_declarationR   t   charreft   groupt   handle_charreft   endt	   entityreft   handle_entityreft
   incomplete(
   R   R8   R   t   it   nR/   t   jR-   t   kt   name(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR   „   sŒ    	   	c         C   s_   |  i  } t i | | d ƒ } | p d S| i ƒ  } |  i | | d | !ƒ | i ƒ  } | S(   Ni   iÿÿÿÿ(   R   t   picloseR)   R*   t	   handle_piR8   (   R   R<   R   R/   R>   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR3   Ô   s    	c      
   C   sƒ  d  |  _ |  i | ƒ } | d j  o | S|  i } | | | !|  _ g  } t i | | d ƒ } | i ƒ  } | | d | !i ƒ  |  _ } xð | | j  oâ t	 i | | ƒ } | p Pn | i
 d d d ƒ \ }	 }
 } |
 p
 d  } nm | d  d j o | d j n p& | d  d j o | d j n o  | d d !} |  i | ƒ } n | i |	 i ƒ  | f ƒ | i ƒ  } qŒ W| | | !i ƒ  } | d j o‘ |  i ƒ  \ } } d
 |  i j o9 | |  i i d
 ƒ } t |  i ƒ |  i i d
 ƒ } n | t |  i ƒ } |  i d | | | !d  f ƒ n | i d	 ƒ o |  i | | ƒ n/ |  i | | ƒ | |  i j o |  i ƒ  n | S(   Ni    i   i   i   s   'iÿÿÿÿt   "R    s   />s   
s    junk characters in start tag: %ri   (   R    s   />(   R   R    t   check_for_whole_start_tagR   t   tagfindR/   R8   t   lowerR   t   attrfindR6   t   unescapet   appendt   stripR   t   countR(   t   rfindR   t   endswitht   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR#   (   R   R<   t   endposR   t   attrsR/   R?   t   tagt   mt   attrnamet   restt	   attrvalueR8   R   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR0   à   sP    		 
&&	c         C   s  |  i  } t i | | ƒ } | oÞ | i ƒ  } | | | d !} | d j o	 | d S| d j oY | i d | ƒ o	 | d S| i d | ƒ o d S|  i | | d ƒ |  i d ƒ n | d j o d S| d	 j o d S|  i | | ƒ |  i d
 ƒ n t d ƒ ‚ d  S(   Ni   R    t   /s   />i   iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   malformed start tags   we should not get here!(   R   t   locatestarttagendR/   R8   R-   R,   R   t   AssertionError(   R   R<   R   RT   R>   t   next(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyRD     s*    			c         C   sŸ   |  i  } t i | | d ƒ } | p d S| i ƒ  } t i | | ƒ } | p |  i d | | | !f ƒ n | i d ƒ } |  i | i	 ƒ  ƒ |  i
 ƒ  | S(   Ni   iÿÿÿÿs   bad end tag: %r(   R   t	   endendtagR)   R8   t
   endtagfindR/   R   R6   t   handle_endtagRF   R$   (   R   R<   R   R/   R>   RS   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR1   1  s    	
c         C   s!   |  i  | | ƒ |  i | ƒ d  S(   N(   RO   R^   (   R   RS   RR   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyRN   A  s    c         C   s   d  S(   N(    (   R   RS   RR   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyRO   F  s    c         C   s   d  S(   N(    (   R   RS   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR^   J  s    c         C   s   d  S(   N(    (   R   R@   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR7   N  s    c         C   s   d  S(   N(    (   R   R@   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR:   R  s    c         C   s   d  S(   N(    (   R   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR+   V  s    c         C   s   d  S(   N(    (   R   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   handle_commentZ  s    c         C   s   d  S(   N(    (   R   t   decl(    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   handle_decl^  s    c         C   s   d  S(   N(    (   R   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyRB   b  s    c         C   s   |  i  d | f ƒ d  S(   Ns   unknown declaration: %r(   R   (   R   R   (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   unknown_decle  s    c            s4   d | j o | S‡  f d †  } t  i d | | ƒ S(   NR'   c            s   |  i  ƒ  d }  |  d d j oI |  d }  |  d d j o t |  d d ƒ } n t |  ƒ } t | ƒ Sd d  k } t i d  j oH h d d	 6} t _ x1 | i i ƒ  D] \ } } t | ƒ | | <qª Wn y ˆ  i |  SWn t	 j
 o d
 |  d SXd  S(   Ni    t   #i   t   xt   Xi   iÿÿÿÿu   't   aposR'   R&   (   Rd   Re   (
   t   groupst   intt   unichrt   htmlentitydefsR   t
   entitydefsR   t   name2codepointt	   iteritemst   KeyError(   t   st   cRj   Rk   R?   t   v(   R   (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   replaceEntitiesm  s"    
 s#   &(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(   t   ret   sub(   R   Ro   Rr   (    (   R   s'   /mit/python/lib/python2.6/HTMLParser.pyRH   j  s    (   s   scriptR   N(   R   R   R   RP   R   R   R   R   R   R   R    R!   R#   R$   R   R3   R0   RD   R1   RN   RO   R^   R7   R:   R+   R_   Ra   RB   Rb   Rk   RH   (    (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyR   C   s8   										P		3												(   R   R   Rs   t   compileR   R"   R;   R9   R5   R.   RA   t   commentcloseRE   RG   t   VERBOSERY   R\   R]   t	   ExceptionR   R   R   (    (    (    s'   /mit/python/lib/python2.6/HTMLParser.pyt   <module>   s&   
	