*ë
r2û:c    L   sR    d  Z    d k Z  d k Z  d k l Z ! d Z " d Z # d Z % e i e i d ƒ ƒ Z	 & e i d d ƒ Z
 ' e i d e ƒ Z ( e i d e ƒ Z + d „  Z H e i d	 I e i d
 ƒ ƒ Z J e i d K e i e i ƒ  ƒ ƒ Z N d d d g Z O d d d d d d d d g Z R e e i e ƒ Z S e e i e ƒ Z U e i d V e i e i e Œ  ƒ e i e i e Œ  ƒ e i e	 e i d ƒ ƒ ƒ Z Z e i d [ e i d ƒ ƒ Z ^ d d d d d d  d! d d" d# d$ d% d& d' d( d) d* d+ g Z a e e i e ƒ Z b e i d, c e i e Œ  ƒ Z e e i d- f e i d. ƒ e	 e e	 e e	 e i d/ ƒ e	 e i e e	 ƒ e e	 e e i ƒ  ƒ Z  x e d0 d1 d2 ƒ Z! | e i d3 } e i d
 ƒ ƒ Z"  e i d4 € e i d5 ƒ e i e	 e i e" e i e i d ƒ ƒ ƒ e i ƒ  ƒ ƒ Z# ‰ e i d6 Š e i d7 ƒ ƒ Z$ ‹ e i d8 Œ e i d9 ƒ e	 e$ e i ƒ  ƒ Z% “ e i d: ” e i d; ƒ ƒ Z& – e i d< — e i d= ƒ ƒ Z' ™ e i d> š e i d? ƒ e	 e& e	 e i d@ ƒ e' e i ƒ  ƒ Z( ¥ e dA dB dC ƒ Z) ¨ e i dD © e i dE ƒ e i dF ƒ e i dG ƒ ƒ Z* ¬ e i dH ­ e i dI ƒ e* e i ƒ  ƒ Z+ ± e dJ dK dL ƒ Z, · e i dM ¸ e i- ƒ  ƒ Z. º e i dN » e i e	 e i- ƒ  ƒ ƒ Z/ ¾ e i dO ¿ e i dP ƒ e	 e. e/ ƒ Z0 Ê e i dQ Ë e i d= ƒ ƒ Z1 Ñ e i dR Ò e i dS ƒ e i dT ƒ e i dU ƒ ƒ Z2 Õ e i dV Ö e i dW ƒ e	 e1 e i e	 e2 ƒ e i ƒ  ƒ Z3 Ý e dX dY dZ ƒ Z4 Þ e d[ d\ d] ƒ Z5 ß e d^ d_ d` ƒ Z6 â e i da ã e i db ƒ e i dc ƒ e i ƒ  ƒ Z7 è e i dd é e i de ƒ e i df ƒ e i ƒ  ƒ Z8 î e dg dh di ƒ Z9 ñ e i dj ò e3 e4 e i e5 ƒ e6 e i e7 ƒ e i e8 ƒ e i e9 ƒ ƒ Z: û e dk dl dm ƒ Z; e i dn e i do ƒ e	 e i dp ƒ e i ƒ  ƒ Z< dq dr ds dt du dv dw dx dy dz d{ d| d} d~ d d€ d d‚ dƒ d„ d… d† d‡ dˆ d‰ d dŠ d‹ dŒ d dŽ d d d‘ d’ d“ d” d• d– d— d˜ d d™ dš d› dœ d dL d d( dž dŸ d  d¡ d d¢ d£ d¤ d¥ d¦ d§ d¨ d© dª d« d¬ fB Z= he e i e= ƒ Z> je i d­ ke i e> Œ  ƒ Z? te i d® ue i d¯ ƒ e i d° ƒ ƒ Z@ ‡‰e i d± Še i- d² ƒ e iA e e i d³ ƒ e i- d² ƒ ƒ ƒ ZB ‘e i d´ ’e e? eB ƒ ZC œdq dµ d¶ d· d¸ d¹ dº d» d¼ d½ d¾ d¿ dÀ dÁ dÂ dÃ dÄ dÅ dÆ dÇ dÈ dÉ dÊ dË dÌ dÍ d| dÎ dÏ dÐ dÑ dÒ dÓ dÔ dÕ dÖ d× dØ dÙ dÚ dÛ dM dÜ dÝ dÞ dß dà dá dâ dã dä då dæ dç dè dé dê dë dì dí dî dï dð dñ dò dó dô dõ dö d÷ dø d¡ dù dú dû dü fL ZD (e e i eD ƒ ZE *e i dý +e i e	 ƒ e i dþ ƒ e i eE Œ  e i e i dÿ ƒ ƒ ƒ ZF 8e i d 9e i- ƒ  e iA e e iG dþ ƒ e i dþ ƒ e i e	 ƒ e i dƒ e	 Be i- ƒ  ƒ ƒ ZH Ce i dDeF eH ƒ ZI Fe i dGeC e iA eI ƒ ƒ ZJ Le i dMe i dƒ ƒ ZK Ne i dOe i dƒ e	 eK e i ƒ  ƒ ZL Ve i dWe i d	ƒ e i- d
ƒ e i ƒ  Bƒ ZM [e i d\e i d= ƒ ƒ ZN ]e i d^e i- ƒ  ƒ ZO _e i d`e	 e i eN ƒ eO ƒ ZP de i deeM e i eP ƒ ƒ ZQ je i dke i dƒ e i e i ƒ  ƒ ƒ ZR ne i doe  e! e# e i e% ƒ e i e( ƒ e) e i e+ ƒ e, e0 e iA e: ƒ e i e; ƒ e< e i eJ ƒ eL eQ eR ƒ ZS €e iT deS e iU d. f ƒ ZV ‡e i dƒ ZW ”e iX d•eW e iY df –eS e iZ df —e[ e[ e[ ƒ
 Z\ še iT deS ›e iZ df ƒ Z] d S(  s‘  Martel based parser to read GenBank formatted files.

This is a huge regular regular expression for GenBank, built using
the 'regular expressiona on steroids' capabilities of Martel.

Notes:
Just so I remember -- the new end of line syntax is:
  New regexp syntax - \R
     \R    means "
|
?"
     [\R]  means "[
]"

This helps us have endlines be consistent across platforms.

Documentation for GenBank format that I found:

o GenBank/EMBL feature tables are described at:
http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html

o There are also descriptions of different GenBank lines at:
http://www.ibc.wustl.edu/standards/gbrel.txt
N(   s   RecordReaderi   i   i   s    i   c    s™   + ; < t  t |  ƒ } = t o | d j p
 t | ‚ ? t i | @ t i	 |  d | ƒ t i
 | ƒ t i t i	 d t  ƒ t i
 | ƒ ƒ ƒ Sd S(   s=  Define a Martel grouping which can parse a block of text.

    Many of the GenBank lines we'll want to process are grouped into
    a block like:

    IDENTIFIER   Blah blah blah

    Where blah blah blah can wrap for multiple lines. This function makes
    it easy to consistently define a definition for these blocks.

    Arguments:
    o identifier - The identifier that begins the block (like DEFINITION).
    o block_tag - A callback tag for the entire block.
    o block_data - A callback tag for the data in the block (ie. the
    stuff you are interested in).
    i    s    N(   s   INDENTs   lens
   identifiers   diffs	   __debug__s   AssertionErrors   Martels   Groups	   block_tags   Strs   ToEols
   block_datas   Rep(   s
   identifiers	   block_tags
   block_datas   diff(    (    s3   /mit/seven/lib/python/Bio/GenBank/genbank_format.pys   define_block+ s
   !s   locuss   [\w]+s   sizes   ss-s   ds-s   ms-s   DNAs   RNAs   mRNAs   tRNAs   rRNAs   uRNAs   snRNAs   PROTEINs   residue_types   circulars   dates   [-\w]+s   PRIs   RODs   MAMs   VRTs   INVs   PLNs   BCTs   VRLs   PHGs   SYNs   UNAs   ESTs   PATs   STSs   GSSs   HTGs   HTCs   data_file_divisions
   locus_lines   LOCUSs   bp|aas
   DEFINITIONs   definition_blocks
   definitions	   accessions   accession_blocks	   ACCESSIONs   nids   [\w\d]+s   nid_lines   NIDs   versions	   [\w\d\.]+s   gis   [\d]+s   version_lines   VERSIONs   GI:s   KEYWORDSs   keywords_blocks   keywordss   segments   segment_nums    of s   segment_totals   segment_lines   SEGMENT     s   SOURCEs   source_blocks   sources   organisms   taxonomys   organism_blocks
     ORGANISMs   reference_nums   reference_basess   (s   [;\w\d \R]+s   )s   reference_lines	   REFERENCEs	     AUTHORSs   authors_blocks   authorss     TITLEs   title_blocks   titles	     JOURNALs   journal_blocks   journals   medline_lines     MEDLINE   s
   medline_ids   pubmed_lines      PUBMED   s	   pubmed_ids     REMARKs   remark_blocks   remarks	   references   COMMENTs   comment_blocks   comments   features_lines   FEATURESs   Location/Qualifierss   alleles
   attenuators   C_regions   CAAT_signals   CDSs   conflicts   D-loops	   D_segments   enhancers   exons	   GC_signals   genes   iDNAs   introns	   J_segments   LTRs   mat_peptides   misc_bindings   misc_differences   misc_features   misc_recombs   misc_RNAs   misc_signals   misc_structures   modified_bases   mutations   N_regions   old_sequences   polyA_signals
   polyA_sites   precursor_RNAs   prim_transcripts   primer_binds   primers   promoters   protein_binds   RBSs
   rep_origins   repeat_regions   repeat_units   S_regions	   satellites   scRNAs   sig_peptides	   stem_loops   TATA_signals
   terminators   transit_peptides
   transposons   unsures   V_regions	   V_segments	   variations
   -10_signals
   -35_signals   3'clips   3'UTRs   5'clips   5'UTRs   -s   feature_keys   location_refs
   [_\d\w\.]+s   :s   locations   feature_locations   (?!/)s   feature_key_lines	   anticodons   bound_moietys	   cell_lines	   cell_types
   chromosomes   chloroplasts   chromoplasts   citations	   clone_libs   clones   codon_starts   codons   cons_splices   countrys   cultivars   cyanelles   db_xrefs	   dev_stages	   directions	   EC_numbers   evidences	   exceptions   focuss	   frequencys   functions   germlines	   haplotypes   insertion_seqs   isolates   kinetoplasts   labels   lab_hosts   macronuclears   maps   mitochondrions   mod_bases   notes   numbers	   organelles   partials   PCR_conditionss	   phenotypes   plasmids   pop_variants   products
   protein_ids   provirals   pseudos
   rearrangeds   replaces
   rpt_familys   rpt_types   rpt_units   sequenced_mols   serotypes   sexs   specific_hosts   specimen_vouchers   standard_names   strains	   sub_clones   sub_speciess
   sub_strains
   tissue_libs   tissue_types   translations   transl_excepts   transl_tables   types   usedins   varietys   virions   qualifier_keys   /s   =s   qualifier_values	   [\w\-\,]+s	   qualifiers   features
   base_counts   [\w\d ]+s   base_count_lines
   BASE COUNTs   origin_lines   ORIGINs   origin_names   base_numbers   sequences   sequence_lines   sequence_entrys
   record_ends   //s   genbank_records   genbank_files  (?P<filename>[^ ]+) +Genetic Sequence Data Bank
 *(?P<release_day>\d+) (?P<release_month>\w+) (?P<release_year>\d+)

 *(?P<data_bank_name>[^\R]+)

 *(?P<data_bank_name>[^\R]+)

 *(?P<num_loci>\d+) loci, *(?P<num_bases>\d+) bases, from *(?P<num_reports>\d+) reported sequences


s   genbanki
   (^   s   __doc__s   strings   Martels   RecordReaders   INDENTs   FEATURE_KEY_INDENTs   FEATURE_QUALIFIER_INDENTs   Rep1s   Strs   blank_spaces   small_indent_spaces   big_indent_spaces   qualifier_spaces   define_blocks   Groups   Res   locuss   Integers   sizes   valid_residue_prefixess   valid_residue_typess   maps   residue_prefixess   residue_typess   Opts   Alts   residue_types   dates   valid_divisionss	   divisionss   data_file_divisions   AnyEols
   locus_lines   definition_blocks	   accessions   accession_blocks   nids   nid_lines   versions   gis   version_lines   keywords_blocks   segments   segment_lines   source_blocks   ToEols   organisms   taxonomys   organism_blocks   reference_nums   reference_basess   reference_lines   authors_blocks   title_blocks   journal_blocks   medline_lines   pubmed_lines   remark_blocks	   references   comment_blocks   features_lines   feature_key_namess   valid_feature_keyss   feature_keys   location_refs   Reps   locations   feature_key_lines   feature_qualifier_namess   feature_qualifierss   qualifier_keys   AnyButs   qualifier_values	   qualifiers   features
   base_counts   base_count_lines   origin_lines   base_numbers   sequences   sequence_lines   sequence_entrys
   record_ends   records   ParseRecordss
   StartsWiths   record_formats   headers   HeaderFooters
   CountLiness   EndsWiths   Nones   formats   multirecord(K   s   journal_blocks   organisms   base_count_lines   sequence_entrys   qualifier_values   reference_nums   pubmed_lines   segment_lines   base_numbers   records   keywords_blocks   valid_residue_typess   features_lines   organism_blocks   residue_prefixess   definition_blocks   INDENTs	   divisionss   multirecords   feature_key_namess   residue_types   location_refs   dates   title_blocks   accession_blocks   qualifier_keys   feature_key_lines   nids   medline_lines   origin_lines   FEATURE_QUALIFIER_INDENTs   formats   remark_blocks   strings   locations   residue_typess   segments   valid_divisionss	   qualifiers   data_file_divisions   source_blocks   authors_blocks   sequences   nid_lines   features   comment_blocks   valid_feature_keyss
   locus_lines   FEATURE_KEY_INDENTs   Martels   sizes   gis   version_lines   reference_lines   sequence_lines   RecordReaders   blank_spaces   feature_qualifier_namess   qualifier_spaces   headers   feature_qualifierss   locuss   versions	   accessions   big_indent_spaces   define_blocks   taxonomys   record_formats   reference_basess	   references   feature_keys
   base_counts
   record_ends   valid_residue_prefixess   small_indent_space(    (    s3   /mit/seven/lib/python/Bio/GenBank/genbank_format.pys   ? sö   			!N?aO	'</#!/8,,H	0
Ï[	"<
íŒEa
',(~