mò
­fIc           @   s  d  Z  d k Z d k Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z	 e i d ƒ Z
 e i d	 ƒ Z e i d
 ƒ Z e i d ƒ Z e i d e i ƒ Z e i d ƒ Z e i d ƒ Z d e f d „  ƒ  YZ d e i f d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.Ns   [&<]s   <(/|\Z)s
   &[a-zA-Z#]s%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	   <[a-zA-Z]t   >s   --\s*>s   [a-zA-Z][-.a-zA-Z0-9:_]*s_   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?sê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B   s)   t  Z d  Z e e f d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s5   | p t ‚ | |  _  | d |  _ | d |  _ d  S(   Ni    i   (   t   msgt   AssertionErrort   selft   positiont   linenot   offset(   R   R   R   (    (    t'   /mit/python/lib/python2.4/HTMLParser.pyt   __init__4   s    	c         C   s[   |  i } |  i d  j	 o | d |  i } n |  i d  j	 o | d |  i d } n | S(   Ns   , at line %ds   , column %di   (   R   R   t   resultR   t   NoneR   (   R   R
   (    (    R   t   __str__:   s    	(   t   __name__t
   __module__t   __doc__R   R	   R   (    (    (    R   R   1   s    t
   HTMLParserc           B   sò   t  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z e	 Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  i ƒ  d S(   s#   Initialize and reset this instance.N(   R   t   reset(   R   (    (    R   R	   Z   s     c         C   s/   d |  _ d |  _ t |  _ t i i |  ƒ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???N(   R   t   rawdatat   lasttagt   interesting_normalt   interestingt
   markupbaset
   ParserBaseR   (   R   (    (    R   R   ^   s
     			c         C   s!   |  i | |  _ |  i d ƒ d S(   s   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '
').
        i    N(   R   R   t   datat   goahead(   R   R   (    (    R   t   feede   s     c         C   s   |  i d ƒ d S(   s   Handle any buffered data.i   N(   R   R   (   R   (    (    R   t   closen   s     c         C   s   t  | |  i ƒ  ƒ ‚ d  S(   N(   R   t   messageR   t   getpos(   R   R   (    (    R   t   errorr   s    c         C   s   |  i S(   s)   Return full source of start tag: '<...>'.N(   R   t   _HTMLParser__starttag_text(   R   (    (    R   t   get_starttag_textw   s     c         C   s   t  |  _ d  S(   N(   t   interesting_cdataR   R   (   R   (    (    R   t   set_cdata_mode{   s    c         C   s   t  |  _ d  S(   N(   R   R   R   (   R   (    (    R   t   clear_cdata_mode~   s    c   
      C   sî  |  i } d } t | ƒ } x| | j  os|  i i | | ƒ }	 |	 o |	 i ƒ  } n | } | | j  o |  i
 | | | !ƒ n |  i | | ƒ } | | j o Pn | i } | d | ƒ ot i | | ƒ o |  i | ƒ } nº | d | ƒ o |  i | ƒ } n— | d | ƒ o |  i | ƒ } nt | d | ƒ o |  i | ƒ } nQ | d | ƒ o |  i | ƒ } n. | d | j  o |  i
 d ƒ | d } n P| d j  o | o |  i d ƒ n Pn |  i | | ƒ } q | d	 | ƒ o… t i | | ƒ }	 |	 og |	 i ƒ  d
 d !} |  i | ƒ |	 i ƒ  } | d | d ƒ p | d } n |  i | | ƒ } q qšPq | d | ƒ ot i | | ƒ }	 |	 oc |	 i d ƒ } |  i | ƒ |	 i ƒ  } | d | d ƒ p | d } n |  i | | ƒ } q n t i | | ƒ }	 |	 o4 | o( |	 i ƒ  | | j o |  i d ƒ n Pqš| d | j  o' |  i
 d ƒ |  i | | d ƒ } qšPq d p
 t d ‚ q W| o7 | | j  o* |  i
 | | | !ƒ |  i | | ƒ } n | | |  _ d  S(   Ni    t   <s   </s   <!--s   <?s   <!i   s   EOF in middle of constructs   &#i   iÿÿÿÿt   ;t   &s#   EOF in middle of entity or char refs   interesting.search() lied(   R   R   t   it   lent   nR   t   searcht   matcht   startt   jt   handle_datat	   updatepost
   startswitht   starttagopent   parse_starttagt   kt   parse_endtagt   parse_commentt   parse_pit   parse_declarationt   endR!   t   charreft   groupt   namet   handle_charreft	   entityreft   handle_entityreft
   incompleteR   (
   R   R;   R3   R>   R*   R0   R,   R   R6   R.   (    (    R   R   „   sŒ    	   	c         C   s„   |  i } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d Sn | i ƒ  } |  i	 | | d | !ƒ | i
 ƒ  } | S(   Ni   s   <?s   unexpected call to parse_pi()iÿÿÿÿ(   R   R   R*   R   t   picloseR-   R.   R/   R0   t	   handle_piR;   (   R   R*   R0   R   R.   (    (    R   R9   Ô   s    	"c         C   s—  d  |  _ |  i | ƒ } | d j  o | Sn |  i } | | | !|  _ g  } t i	 | | d ƒ } | p
 t
 d ‚ | i ƒ  } | | d | !i ƒ  |  _ } xð | | j  oâ t i	 | | ƒ } | p Pn | i d d d ƒ \ } }
 } |
 p
 d  } nm | d  d j o | d j n p& | d  d j o | d j n o  | d d !} |  i | ƒ } n | i | i ƒ  | f ƒ | i ƒ  } q  W| | | !i ƒ  } | d j o‘ |  i ƒ  \ } }	 d |  i j o9 | |  i i d ƒ } t |  i ƒ |  i i d ƒ }	 n |	 t |  i ƒ }	 |  i d | | | !d  f ƒ n | i  d
 ƒ o |  i! | | ƒ n/ |  i" | | ƒ | |  i# j o |  i$ ƒ  n | S(   Ni    i   s#   unexpected call to parse_starttag()i   i   s   'iÿÿÿÿt   "R    s   />s   
s    junk characters in start tag: %ri   (   R    s   />(%   R   R   R"   t   check_for_whole_start_tagR*   t   endposR   t   attrst   tagfindR.   R   R;   R6   t   lowerR   t   tagt   attrfindt   mR=   t   attrnamet   restt	   attrvaluet   unescapet   appendt   stripR    R   R   t   countR+   t   rfindR!   t   endswitht   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR%   (   R   R*   R;   RP   RM   RN   R   RK   R   R   RO   R.   R6   RG   RH   (    (    R   R5   à   sL    		 
L##c         C   s  |  i } t i | | ƒ } | oí | i ƒ  } | | | d !} | d j o | d Sn | d j o_ | i	 d | ƒ o | d Sn | i	 d | ƒ o d Sn |  i
 | | d ƒ |  i d ƒ n | d j o d Sn | d	 j o d Sn |  i
 | | ƒ |  i d
 ƒ n t d ƒ ‚ d  S(   Ni   R    t   /s   />i   iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   malformed start tags   we should not get here!(   R   R   t   locatestarttagendR.   R*   RM   R;   R0   t   nextR3   R2   R!   R   (   R   R*   R0   RM   R\   R   (    (    R   RF     s*    	c         C   sÄ   |  i } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d Sn | i ƒ  } t	 i | | ƒ } | p |  i
 d | | | !f ƒ n | i d ƒ } |  i | i ƒ  ƒ |  i ƒ  | S(   Ni   s   </s   unexpected call to parse_endtagi   iÿÿÿÿs   bad end tag: %r(   R   R   R*   R   t	   endendtagR-   R.   R;   R0   t
   endtagfindR!   R=   RK   t   handle_endtagRJ   R&   (   R   R*   R0   RK   R   R.   (    (    R   R7   1  s    	"
c         C   s!   |  i | | ƒ |  i | ƒ d  S(   N(   R   RX   RK   RH   R_   (   R   RK   RH   (    (    R   RW   A  s    c         C   s   d  S(   N(    (   R   RK   RH   (    (    R   RX   F  s    c         C   s   d  S(   N(    (   R   RK   (    (    R   R_   J  s    c         C   s   d  S(   N(    (   R   R>   (    (    R   R?   N  s    c         C   s   d  S(   N(    (   R   R>   (    (    R   RA   R  s    c         C   s   d  S(   N(    (   R   R   (    (    R   R1   V  s    c         C   s   d  S(   N(    (   R   R   (    (    R   t   handle_commentZ  s    c         C   s   d  S(   N(    (   R   t   decl(    (    R   t   handle_decl^  s    c         C   s   d  S(   N(    (   R   R   (    (    R   RD   b  s    c         C   s   |  i d | f ƒ d  S(   Ns   unknown declaration: %r(   R   R!   R   (   R   R   (    (    R   t   unknown_decle  s    c         C   ss   d | j o | Sn | i d d ƒ } | i d d ƒ } | i d d ƒ } | i d d	 ƒ } | i d
 d ƒ } | S(   NR)   s   &lt;R'   s   &gt;R    s   &apos;t   's   &quot;RE   s   &amp;(   t   st   replace(   R   Re   (    (    R   RQ   i  s    (   s   scriptR   (   R   R   R   RY   R	   R   R   R   R!   R   R"   R#   R%   R&   R   R9   R5   RF   R7   RW   RX   R_   R?   RA   R1   R`   Rb   RD   Rc   RQ   (    (    (    R   R   C   s6    										P		3												(   R   R   t   ret   compileR   R$   RB   R@   R<   R4   RC   t   commentcloseRI   RL   t   VERBOSER[   R]   R^   t	   ExceptionR   R   R   (   R4   R   R[   R   R<   R   R]   R$   RI   R   R@   RL   Rg   R^   RC   Ri   RB   (    (    R   t   ?   s"   
		