mò
­fIc           @   s‚   d  Z  d k Z d k l Z d d g Z d e i f d „  ƒ  YZ d e i f d „  ƒ  YZ e	 d „ Z
 e d j o e
 ƒ  n d S(	   sv   HTML 2.0 parser.

See the HTML 2.0 specification:
http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
N(   s   AS_ISt
   HTMLParsert   HTMLParseErrorc           B   s   t  Z d  Z RS(   s3   Error raised when an HTML document can't be parsed.(   t   __name__t
   __module__t   __doc__(    (    (    t$   /mit/python/lib/python2.4/htmllib.pyR      s   c           B   s0  t  Z d  Z d k l Z d d „ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z  d „  Z! d  „  Z" d! „  Z# d" „  Z$ d# „  Z% d$ „  Z& d% „  Z' d& „  Z( d' „  Z) d( „  Z* d) „  Z+ d* „  Z, d+ „  Z- d, „  Z. d- „  Z/ d. „  Z0 d/ „  Z1 d0 „  Z2 d1 „  Z3 d2 „  Z4 d3 „  Z5 d4 „  Z6 d5 „  Z7 d6 „  Z8 d7 „  Z9 d8 „  Z: d9 „  Z; d: „  Z< d; „  Z= d< „  Z> d d= „ Z? d> „  Z@ d? „  ZA d@ „  ZB dA „  ZC dB „  ZD dC „  ZE dD „  ZF dE „  ZG dF „  ZH dG „  ZI dH „  ZJ dI „  ZK dJ „  ZL dK „  ZM dL „  ZN dM „  ZO dN „  ZP dO „  ZQ dP „  ZR dQ „  ZS dR „  ZT dS „  ZU dT „  ZV dU „  ZW dV „  ZX dW „  ZY dX „  ZZ dY „  Z[ RS(Z   sÌ   This is the basic HTML parser class.

    It supports all entity names required by the XHTML 1.0 Recommendation.
    It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2
    elements.

    (   s
   entitydefsi    c         C   s    t  i i |  | ƒ | |  _ d S(   s•   Creates an instance of the HTMLParser class.

        The formatter parameter is the formatter instance associated with
        the parser.

        N(   t   sgmllibt
   SGMLParsert   __init__t   selft   verboset	   formatter(   R	   R   R
   (    (    R   R      s     c         C   s   t  | ƒ ‚ d  S(   N(   R   t   message(   R	   R   (    (    R   t   error'   s    c         C   s\   t  i i |  ƒ d  |  _ d |  _ d  |  _ d  |  _ d  |  _	 g  |  _
 d |  _ g  |  _ d  S(   Ni    (   R   R   t   resetR	   t   Nonet   savedatat   isindext   titlet   baset   anchort
   anchorlistt   nofillt
   list_stack(   R	   (    (    R   R   *   s    							c         C   sV   |  i d  j	 o |  i | |  _ n/ |  i o |  i i | ƒ n |  i i | ƒ d  S(   N(   R	   R   R   t   dataR   R   t   add_literal_datat   add_flowing_data(   R	   R   (    (    R   t   handle_data:   s
    
c         C   s   d |  _ d S(   sê   Begins saving character data in a buffer instead of sending it
        to the formatter object.

        Retrieve the stored data via the save_end() method.  Use of the
        save_bgn() / save_end() pair may not be nested.

        t    N(   R	   R   (   R	   (    (    R   t   save_bgnE   s     c         C   s9   |  i } d |  _ |  i p d i | i ƒ  ƒ } n | S(   sH  Ends buffering character data and returns all data saved since
        the preceding call to the save_bgn() method.

        If the nofill flag is false, whitespace is collapsed to single
        spaces.  A call to this method without a preceding call to the
        save_bgn() method will raise a TypeError exception.

        t    N(   R	   R   R   R   R   t   joint   split(   R	   R   (    (    R   t   save_endO   s     		
c         C   s+   | |  _ |  i o |  i i | ƒ n d S(   s}  This method is called at the start of an anchor region.

        The arguments correspond to the attributes of the <A> tag with
        the same names.  The default implementation maintains a list of
        hyperlinks (defined by the HREF attribute for <A> tags) within
        the document.  The list of hyperlinks is available as the data
        attribute anchorlist.

        N(   t   hrefR	   R   R   t   append(   R	   R"   t   namet   type(    (    R   t
   anchor_bgn`   s    	 	
c         C   s5   |  i o' |  i d t |  i ƒ ƒ d |  _ n d S(   sØ   This method is called at the end of an anchor region.

        The default implementation adds a textual footnote marker using an
        index into the list of hyperlinks created by the anchor_bgn()method.

        s   [%d]N(   R	   R   R   t   lenR   R   (   R	   (    (    R   t
   anchor_endn   s     
c         G   s   |  i | ƒ d S(   s–   This method is called to handle images.

        The default implementation simply passes the alt value to the
        handle_data() method.

        N(   R	   R   t   alt(   R	   t   srcR)   t   args(    (    R   t   handle_image{   s     c         C   s   d  S(   N(    (   R	   t   attrs(    (    R   t
   start_html†   s    c         C   s   d  S(   N(    (   R	   (    (    R   t   end_html‡   s    c         C   s   d  S(   N(    (   R	   R-   (    (    R   t
   start_head‰   s    c         C   s   d  S(   N(    (   R	   (    (    R   t   end_headŠ   s    c         C   s   d  S(   N(    (   R	   R-   (    (    R   t
   start_bodyŒ   s    c         C   s   d  S(   N(    (   R	   (    (    R   t   end_body   s    c         C   s   |  i ƒ  d  S(   N(   R	   R   (   R	   R-   (    (    R   t   start_title‘   s    c         C   s   |  i ƒ  |  _ d  S(   N(   R	   R!   R   (   R	   (    (    R   t	   end_title”   s    c         C   s5   x. | D]& \ } } | d j o | |  _ q q Wd  S(   NR"   (   R-   t   at   vR	   R   (   R	   R-   R6   R7   (    (    R   t   do_base—   s     c         C   s   d |  _ d  S(   Ni   (   R	   R   (   R	   R-   (    (    R   t
   do_isindexœ   s    c         C   s   d  S(   N(    (   R	   R-   (    (    R   t   do_linkŸ   s    c         C   s   d  S(   N(    (   R	   R-   (    (    R   t   do_meta¢   s    c         C   s   d  S(   N(    (   R	   R-   (    (    R   t	   do_nextid¥   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t   h1i    (   R=   i    i   i    (   R	   R   t   end_paragrapht	   push_font(   R	   R-   (    (    R   t   start_h1¬   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   t   pop_font(   R	   (    (    R   t   end_h1°   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t   h2i    (   RC   i    i   i    (   R	   R   R>   R?   (   R	   R-   (    (    R   t   start_h2´   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   RA   (   R	   (    (    R   t   end_h2¸   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t   h3i    (   RF   i    i   i    (   R	   R   R>   R?   (   R	   R-   (    (    R   t   start_h3¼   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   RA   (   R	   (    (    R   t   end_h3À   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t   h4i    (   RI   i    i   i    (   R	   R   R>   R?   (   R	   R-   (    (    R   t   start_h4Ä   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   RA   (   R	   (    (    R   t   end_h4È   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t   h5i    (   RL   i    i   i    (   R	   R   R>   R?   (   R	   R-   (    (    R   t   start_h5Ì   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   RA   (   R	   (    (    R   t   end_h5Ð   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t   h6i    (   RO   i    i   i    (   R	   R   R>   R?   (   R	   R-   (    (    R   t   start_h6Ô   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   RA   (   R	   (    (    R   t   end_h6Ø   s    c         C   s   |  i i d ƒ d  S(   Ni   (   R	   R   R>   (   R	   R-   (    (    R   t   do_pÞ   s    c         C   s@   |  i i d ƒ |  i i t t t d f ƒ |  i d |  _ d  S(   Ni   (   R	   R   R>   R?   t   AS_ISR   (   R	   R-   (    (    R   t	   start_preá   s    c         C   s:   |  i i d ƒ |  i i ƒ  t d |  i d ƒ |  _ d  S(   Ni   i    (   R	   R   R>   RA   t   maxR   (   R	   (    (    R   t   end_preæ   s    c         C   s   |  i | ƒ |  i d ƒ d  S(   Nt   xmp(   R	   RT   R-   t
   setliteral(   R	   R-   (    (    R   t	   start_xmpë   s    c         C   s   |  i ƒ  d  S(   N(   R	   RV   (   R	   (    (    R   t   end_xmpï   s    c         C   s   |  i | ƒ |  i d ƒ d  S(   Nt   listing(   R	   RT   R-   RX   (   R	   R-   (    (    R   t   start_listingò   s    c         C   s   |  i ƒ  d  S(   N(   R	   RV   (   R	   (    (    R   t   end_listingö   s    c         C   s0   |  i i d ƒ |  i i t d t t f ƒ d  S(   Ni    i   (   R	   R   R>   R?   RS   (   R	   R-   (    (    R   t   start_addressù   s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni    (   R	   R   R>   RA   (   R	   (    (    R   t   end_addressý   s    c         C   s$   |  i i d ƒ |  i i d ƒ d  S(   Ni   t
   blockquote(   R	   R   R>   t   push_margin(   R	   R-   (    (    R   t   start_blockquote  s    c         C   s!   |  i i d ƒ |  i i ƒ  d  S(   Ni   (   R	   R   R>   t
   pop_margin(   R	   (    (    R   t   end_blockquote  s    c         C   sA   |  i i |  i ƒ |  i i d ƒ |  i i d d d g ƒ d  S(   Nt   ult   *i    (   R	   R   R>   R   Ra   R#   (   R	   R-   (    (    R   t   start_ul  s    c         C   s=   |  i o |  i d =n |  i i |  i ƒ |  i i ƒ  d  S(   Niÿÿÿÿ(   R	   R   R   R>   Rc   (   R	   (    (    R   t   end_ul  s    
 c         C   sm   |  i i d ƒ |  i o0 |  i d \ } } } } | d | d <} n d \ } } |  i i | | ƒ d  S(   Ni    iÿÿÿÿi   i   Rf   (   Rf   i    (	   R	   R   R>   R   t   dummyt   labelt   countert   topt   add_label_data(   R	   R-   Ri   Rl   Rk   Rj   (    (    R   t   do_li  s    
c         C   s–   |  i i |  i ƒ |  i i d ƒ d } xL | D]D \ } } | d j o+ t	 | ƒ d j o | d } n | } q1 q1 W|  i i
 d | d g ƒ d  S(   Nt   ols   1.R%   i   t   .i    (   R	   R   R>   R   Ra   Rj   R-   R6   R7   R'   R#   (   R	   R-   R6   Rj   R7   (    (    R   t   start_ol  s      c         C   s=   |  i o |  i d =n |  i i |  i ƒ |  i i ƒ  d  S(   Niÿÿÿÿ(   R	   R   R   R>   Rc   (   R	   (    (    R   t   end_ol(  s    
 c         C   s   |  i | ƒ d  S(   N(   R	   Rg   R-   (   R	   R-   (    (    R   t
   start_menu-  s    c         C   s   |  i ƒ  d  S(   N(   R	   Rh   (   R	   (    (    R   t   end_menu0  s    c         C   s   |  i | ƒ d  S(   N(   R	   Rg   R-   (   R	   R-   (    (    R   t	   start_dir3  s    c         C   s   |  i ƒ  d  S(   N(   R	   Rh   (   R	   (    (    R   t   end_dir6  s    c         C   s-   |  i i d ƒ |  i i d d d g ƒ d  S(   Ni   t   dlR   i    (   R	   R   R>   R   R#   (   R	   R-   (    (    R   t   start_dl9  s    c         C   s)   |  i d ƒ |  i o |  i d =n d  S(   Ni   iÿÿÿÿ(   R	   t   ddpopR   (   R	   (    (    R   t   end_dl=  s    
 c         C   s   |  i ƒ  d  S(   N(   R	   Ry   (   R	   R-   (    (    R   t   do_dtA  s    c         C   s7   |  i ƒ  |  i i d ƒ |  i i d d d g ƒ d  S(   Nt   ddR   i    (   R	   Ry   R   Ra   R   R#   (   R	   R-   (    (    R   t   do_ddD  s    
c         C   sU   |  i i | ƒ |  i o7 |  i d d d j o |  i d =|  i i ƒ  qQ n d  S(   Niÿÿÿÿi    R|   (   R	   R   R>   t   blR   Rc   (   R	   R~   (    (    R   Ry   I  s
    

c         C   s   |  i | ƒ d  S(   N(   R	   t   start_iR-   (   R	   R-   (    (    R   t
   start_citeT  s    c         C   s   |  i ƒ  d  S(   N(   R	   t   end_i(   R	   (    (    R   t   end_citeU  s    c         C   s   |  i | ƒ d  S(   N(   R	   t   start_ttR-   (   R	   R-   (    (    R   t
   start_codeW  s    c         C   s   |  i ƒ  d  S(   N(   R	   t   end_tt(   R	   (    (    R   t   end_codeX  s    c         C   s   |  i | ƒ d  S(   N(   R	   R   R-   (   R	   R-   (    (    R   t   start_emZ  s    c         C   s   |  i ƒ  d  S(   N(   R	   R   (   R	   (    (    R   t   end_em[  s    c         C   s   |  i | ƒ d  S(   N(   R	   Rƒ   R-   (   R	   R-   (    (    R   t	   start_kbd]  s    c         C   s   |  i ƒ  d  S(   N(   R	   R…   (   R	   (    (    R   t   end_kbd^  s    c         C   s   |  i | ƒ d  S(   N(   R	   Rƒ   R-   (   R	   R-   (    (    R   t
   start_samp`  s    c         C   s   |  i ƒ  d  S(   N(   R	   R…   (   R	   (    (    R   t   end_sampa  s    c         C   s   |  i | ƒ d  S(   N(   R	   t   start_bR-   (   R	   R-   (    (    R   t   start_strongc  s    c         C   s   |  i ƒ  d  S(   N(   R	   t   end_b(   R	   (    (    R   t
   end_strongd  s    c         C   s   |  i | ƒ d  S(   N(   R	   R   R-   (   R	   R-   (    (    R   t	   start_varf  s    c         C   s   |  i ƒ  d  S(   N(   R	   R   (   R	   (    (    R   t   end_varg  s    c         C   s    |  i i t d t t f ƒ d  S(   Ni   (   R	   R   R?   RS   (   R	   R-   (    (    R   R   k  s    c         C   s   |  i i ƒ  d  S(   N(   R	   R   RA   (   R	   (    (    R   R   m  s    c         C   s    |  i i t t d t f ƒ d  S(   Ni   (   R	   R   R?   RS   (   R	   R-   (    (    R   R   p  s    c         C   s   |  i i ƒ  d  S(   N(   R	   R   RA   (   R	   (    (    R   R   r  s    c         C   s    |  i i t t t d f ƒ d  S(   Ni   (   R	   R   R?   RS   (   R	   R-   (    (    R   Rƒ   u  s    c         C   s   |  i i ƒ  d  S(   N(   R	   R   RA   (   R	   (    (    R   R…   w  s    c         C   s—   d } d } d } xk | D]c \ } } | i ƒ  } | d j o
 | } n | d j o
 | } n | d j o | i ƒ  } q q W|  i	 | | | ƒ d  S(   NR   R"   R$   R%   (
   R"   R$   R%   R-   t   attrnamet   valuet   stript   lowerR	   R&   (   R	   R-   R$   R”   R“   R"   R%   (    (    R   t   start_az  s     

c         C   s   |  i ƒ  d  S(   N(   R	   R(   (   R	   (    (    R   t   end_aˆ  s    c         C   s   |  i i ƒ  d  S(   N(   R	   R   t   add_line_break(   R	   R-   (    (    R   t   do_br  s    c         C   s   |  i i ƒ  d  S(   N(   R	   R   t   add_hor_rule(   R	   R-   (    (    R   t   do_hr’  s    c   
      C   s%  d } d } d } d } d } d }	 xÞ | D]Ö \ } } | d j o
 | } n | d j o
 | } n | d j o
 | } n | d j o
 | } n | d j o* y t	 | ƒ } WqÊ t
 j
 o qÊ Xn | d	 j o* y t	 | ƒ }	 Wqt
 j
 o qXq+ q+ W|  i | | | | | |	 ƒ d  S(
   NR   s   (image)i    t   alignR)   t   ismapR*   t   widtht   height(   R   R)   Rž   R*   RŸ   R    R-   R“   R”   t   intt
   ValueErrorR	   R,   (
   R	   R-   R*   R   R”   Rž   R“   RŸ   R)   R    (    (    R   t   do_img—  s6     



  	  c         C   s   |  i | ƒ |  i ƒ  d  S(   N(   R	   RT   R-   t   setnomoretags(   R	   R-   (    (    R   t   do_plaintext±  s    c         C   s   d  S(   N(    (   R	   t   tagR-   (    (    R   t   unknown_starttag·  s    c         C   s   d  S(   N(    (   R	   R¦   (    (    R   t   unknown_endtagº  s    (\   R   R   R   t   htmlentitydefst
   entitydefsR   R   R   R   R   R!   R&   R(   R,   R.   R/   R0   R1   R2   R3   R4   R5   R8   R9   R:   R;   R<   R@   RB   RD   RE   RG   RH   RJ   RK   RM   RN   RP   RQ   RR   RT   RV   RY   RZ   R\   R]   R^   R_   Rb   Rd   Rg   Rh   Rn   Rq   Rr   Rs   Rt   Ru   Rv   Rx   Rz   R{   R}   Ry   R€   R‚   R„   R†   R‡   Rˆ   R‰   RŠ   R‹   RŒ   RŽ   R   R‘   R’   R   R   R   R   Rƒ   R…   R—   R˜   Rš   Rœ   R£   R¥   R§   R¨   (    (    (    R   R       s²    
				
																																													
																																				c   	      C   sJ  d  k  } d  k } |  p | i d }  n |  o |  d d j } | o |  d =n |  o |  d } n d } | d j o | i } nF y t | d ƒ } Wn/ t	 j
 o# } | Gd G| GH| i d ƒ n X| i ƒ  } | | i j	 o | i ƒ  n | o | i ƒ  } n | i | i ƒ  ƒ } t | ƒ } | i | ƒ | i ƒ  d  S(   Ni   i    s   -ss	   test.htmlt   -t   rt   :(   t   sysR   R+   t   argvt   silentt   filet   stdint   ft   opent   IOErrort   msgt   exitt   readR   t   closet   NullFormattert   AbstractFormattert
   DumbWriterR    t   pt   feed(	   R+   R°   R³   R   R®   R½   R±   R¶   R   (    (    R   t   test¾  s2    t   __main__(   R   R   R   RS   t   __all__t   SGMLParseErrorR   R   R    R   R¿   R   (   R   R   RÁ   R    R¿   RS   (    (    R   t   ?   s   	ÿ ­'