;
Ïâ"Ic               @   s  d  Z  d d l Z d d l Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z	 e j d	 ƒ Z
 e j d
 ƒ Z e j d ƒ Z e j d ƒ Z e j d e j ƒ Z e j d	 ƒ Z e j d ƒ Z Gd „  d e ƒ Z Gd „  d e j ƒ Z d S(   u   A parser for HTML and XHTML.i    Nu   [&<]u   <(/|\Z)u
   &[a-zA-Z#]u%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]u)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]u	   <[a-zA-Z]u   >u   --\s*>u   [a-zA-Z][-.a-zA-Z0-9:_]*u_   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?uê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
u#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c             B   s)   |  Ee  Z d  Z d d „ Z d „  Z d S(   u&   Exception raised for all parse errors.c             C   s'   | |  _  | d |  _ | d |  _ d  S(   Ni    i   (   u   msgu   linenou   offset(   u   selfu   msgu   position(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   __init__4   s    	c             C   s[   |  j  } |  j d  k	 o | d |  j } n |  j d  k	 o | d |  j d } n | S(   Nu   , at line %du   , column %di   (   u   msgu   linenou   Noneu   offset(   u   selfu   result(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   __str__:   s    	N(   NN(   u   __name__u
   __module__u   __doc__u   Noneu   __init__u   __str__(   u
   __locals__(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   HTMLParseError1   s   
u   HTMLParseErrorc             B   sþ   |  Ee  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z d „  Z d S(   uÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    u   scriptu   stylec             C   s   |  j  ƒ  d S(   u#   Initialize and reset this instance.N(   u   reset(   u   self(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   __init__Z   s    c             C   s/   d |  _  d |  _ t |  _ t j j |  ƒ d S(   u1   Reset this instance.  Loses all unprocessed data.u    u   ???N(   u   rawdatau   lasttagu   interesting_normalu   interestingu   _markupbaseu
   ParserBaseu   reset(   u   self(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   reset^   s    			c             C   s!   |  j  | |  _  |  j d ƒ d S(   u   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '
').
        i    N(   u   rawdatau   goahead(   u   selfu   data(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   feede   s    c             C   s   |  j  d ƒ d S(   u   Handle any buffered data.i   N(   u   goahead(   u   self(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   closen   s    c             C   s   t  | |  j ƒ  ƒ ‚ d  S(   N(   u   HTMLParseErroru   getpos(   u   selfu   message(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   errorr   s    c             C   s   |  j  S(   u)   Return full source of start tag: '<...>'.(   u   _HTMLParser__starttag_text(   u   self(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   get_starttag_textw   s    c             C   s   t  |  _ d  S(   N(   u   interesting_cdatau   interesting(   u   self(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   set_cdata_mode{   s    c             C   s   t  |  _ d  S(   N(   u   interesting_normalu   interesting(   u   self(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   clear_cdata_mode~   s    c       
      C   sò  |  j  } d } t | ƒ } x|| | k  on|  j j | | ƒ } | o | j ƒ  } n | } | | k  o |  j | | | … ƒ n |  j | | ƒ } | | k o Pn | j } | d | ƒ ot j	 | | ƒ o |  j
 | ƒ } nº | d | ƒ o |  j | ƒ } n— | d | ƒ o |  j | ƒ } nt | d | ƒ o |  j | ƒ } nQ | d | ƒ o |  j | ƒ } n. | d | k  o |  j d ƒ | d } n P| d k  o | o |  j d ƒ n Pn |  j | | ƒ } q | d	 | ƒ oˆ t j	 | | ƒ } | oj | j ƒ  d
 d … }	 |  j |	 ƒ | j ƒ  } | d | d ƒ p | d } n |  j | | ƒ } q q•Pq | d | ƒ ot j	 | | ƒ } | oc | j d ƒ }	 |  j |	 ƒ | j ƒ  } | d | d ƒ p | d } n |  j | | ƒ } q n t j	 | | ƒ } | o: | o. | j ƒ  | | d  … k o |  j d ƒ n Pq•| d | k  o' |  j d ƒ |  j | | d ƒ } q•Pq q W| o: | | k  o- |  j | | | … ƒ |  j | | ƒ } n | | d  … |  _  d  S(   Ni    u   <u   </u   <!--u   <?u   <!i   u   EOF in middle of constructu   &#i   iÿÿÿÿu   ;u   &u#   EOF in middle of entity or char ref(   u   rawdatau   lenu   interestingu   searchu   startu   handle_datau	   updateposu
   startswithu   starttagopenu   matchu   parse_starttagu   parse_endtagu   parse_commentu   parse_piu   parse_declarationu   erroru   charrefu   groupu   handle_charrefu   endu	   entityrefu   handle_entityrefu
   incomplete(
   u   selfu   endu   rawdatau   iu   nu   matchu   ju
   startswithu   ku   name(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   goahead„   sŒ    	   	$c             C   sb   |  j  } t j | | d ƒ } | p d S| j ƒ  } |  j | | d | … ƒ | j ƒ  } | S(   Ni   iÿÿÿÿ(   u   rawdatau   picloseu   searchu   startu	   handle_piu   end(   u   selfu   iu   rawdatau   matchu   j(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   parse_piÔ   s    	c             C   s°  d  |  _ |  j | ƒ } | d k  o | S|  j } | | | … |  _ g  } t j | | d ƒ } | j ƒ  } | | d | … j ƒ  |  _ } x| | k  oý t	 j | | ƒ } | p Pn | j
 d d d ƒ \ }	 }
 } |
 p
 d  } nˆ | d  d … d k o | d d  … k n p2 | d  d … d k o | d d  … k n o# | d d … } |  j | ƒ } n | j |	 j ƒ  | f ƒ | j ƒ  } q’ W| | | … j ƒ  } | d k oš |  j ƒ  \ } } d
 |  j k o9 | |  j j d
 ƒ } t |  j ƒ |  j j d
 ƒ } n | t |  j ƒ } |  j d | | | … d  d … f ƒ n | j d	 ƒ o |  j | | ƒ n/ |  j | | ƒ | |  j k o |  j ƒ  n | S(   Ni    i   i   i   u   'iÿÿÿÿu   "u   >u   />u   
u    junk characters in start tag: %ri   (   u   >u   />(   u   Noneu   _HTMLParser__starttag_textu   check_for_whole_start_tagu   rawdatau   tagfindu   matchu   endu   loweru   lasttagu   attrfindu   groupu   unescapeu   appendu   stripu   getposu   countu   lenu   rfindu   erroru   endswithu   handle_startendtagu   handle_starttagu   CDATA_CONTENT_ELEMENTSu   set_cdata_mode(   u   selfu   iu   endposu   rawdatau   attrsu   matchu   ku   tagu   mu   attrnameu   restu	   attrvalueu   endu   linenou   offset(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   parse_starttagà   sP    		! 
22	#c             C   s  |  j  } t j | | ƒ } | oá | j ƒ  } | | | d … } | d k o	 | d S| d k oY | j d | ƒ o	 | d S| j d | ƒ o d S|  j | | d ƒ |  j d ƒ n | d k o d S| d	 k o d S|  j | | ƒ |  j d
 ƒ n t d ƒ ‚ d  S(   Ni   u   >u   /u   />i   iÿÿÿÿu   malformed empty start tagu    u6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZu   malformed start tagu   we should not get here!(   u   rawdatau   locatestarttagendu   matchu   endu
   startswithu	   updateposu   erroru   AssertionError(   u   selfu   iu   rawdatau   mu   ju   next(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   check_for_whole_start_tag  s*    			c             C   s¢   |  j  } t j | | d ƒ } | p d S| j ƒ  } t j | | ƒ } | p" |  j d | | | … f ƒ n | j d ƒ } |  j | j	 ƒ  ƒ |  j
 ƒ  | S(   Ni   iÿÿÿÿu   bad end tag: %r(   u   rawdatau	   endendtagu   searchu   endu
   endtagfindu   matchu   erroru   groupu   handle_endtagu   loweru   clear_cdata_mode(   u   selfu   iu   rawdatau   matchu   ju   tag(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   parse_endtag1  s    	"
c             C   s!   |  j  | | ƒ |  j | ƒ d  S(   N(   u   handle_starttagu   handle_endtag(   u   selfu   tagu   attrs(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_startendtagA  s    c             C   s   d  S(   N(    (   u   selfu   tagu   attrs(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_starttagF  s    c             C   s   d  S(   N(    (   u   selfu   tag(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_endtagJ  s    c             C   s   d  S(   N(    (   u   selfu   name(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_charrefN  s    c             C   s   d  S(   N(    (   u   selfu   name(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_entityrefR  s    c             C   s   d  S(   N(    (   u   selfu   data(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_dataV  s    c             C   s   d  S(   N(    (   u   selfu   data(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_commentZ  s    c             C   s   d  S(   N(    (   u   selfu   decl(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   handle_decl^  s    c             C   s   d  S(   N(    (   u   selfu   data(    (    u(   /mit/python/lib/python3.0/html/parser.pyu	   handle_pib  s    c             C   s   |  j  d | f ƒ d  S(   Nu   unknown declaration: %r(   u   error(   u   selfu   data(    (    u(   /mit/python/lib/python3.0/html/parser.pyu   unknown_decle  s    c                s:   d | k o | S‡  f d †  } t  j d | | t  j ƒ S(   Nu   &c                s  |  j  ƒ  d }  |  d d k oU |  d d  … }  |  d d k o t |  d d  … d ƒ } n t |  ƒ } t | ƒ Sd d  l } t j d  k oK i d d 6} t _ x4 | j j j	 ƒ  D] \ } } t | ƒ | | <q¹ Wn y ˆ  j |  SWn  t
 k
 o d	 |  d
 SYn Xd  S(   Ni    u   #i   u   xu   Xi   u   'u   aposu   &u   ;(   u   xu   X(   u   groupsu   intu   chru   html.entitiesu
   HTMLParseru
   entitydefsu   Noneu   entitiesu   name2codepointu   itemsu   KeyError(   u   su   cu   htmlu
   entitydefsu   ku   v(   u   self(    u(   /mit/python/lib/python3.0/html/parser.pyu   replaceEntitiesm  s"     u#   &(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(   u   reu   subu   ASCII(   u   selfu   su   replaceEntities(    (   u   selfu(   /mit/python/lib/python3.0/html/parser.pyu   unescapej  s
    	N(   u   scriptu   style(   u   __name__u
   __module__u   __doc__u   CDATA_CONTENT_ELEMENTSu   __init__u   resetu   feedu   closeu   erroru   Noneu   _HTMLParser__starttag_textu   get_starttag_textu   set_cdata_modeu   clear_cdata_modeu   goaheadu   parse_piu   parse_starttagu   check_for_whole_start_tagu   parse_endtagu   handle_startendtagu   handle_starttagu   handle_endtagu   handle_charrefu   handle_entityrefu   handle_datau   handle_commentu   handle_declu	   handle_piu   unknown_declu
   entitydefsu   unescape(   u
   __locals__(    (    u(   /mit/python/lib/python3.0/html/parser.pyu
   HTMLParserC   s8   
										P		3												u
   HTMLParser(   u   __doc__u   _markupbaseu   reu   compileu   interesting_normalu   interesting_cdatau
   incompleteu	   entityrefu   charrefu   starttagopenu   picloseu   commentcloseu   tagfindu   attrfindu   VERBOSEu   locatestarttagendu	   endendtagu
   endtagfindu	   Exceptionu   HTMLParseErroru
   ParserBaseu
   HTMLParser(    (    (    u(   /mit/python/lib/python3.0/html/parser.pyu   <module>   s&   
	