*
D;c       s    d  Z  $ % d d d g Z + d k Z , d k Z - d k Z . d k Z / d k Z 2 d k l Z 5 d k	 Z	 6 d k	 l
 Z
 9 d k l Z : d k l Z ; d	 k l Z < d
 k l Z = d k l Z > d k l Z ? d k l Z @ d k l Z A d k l Z B d k l Z D d k Z E d k Z G d k l Z H d k l Z I d k  l! Z! K d f  d     YZ"  d f  d     YZ#  d e$ f d     YZ%  d e$ f d     YZ&  d f  d     YZ'  d f  d     YZ( d  f  d!     YZ) d" e f d#     YZ* yd$ e* f d%     YZ+ hd& e* f d'     YZ, d(   Z- d) f  d*     YZ. Vd+ d, g g  d- d.  Z/ d/ f  d0     YZ0 e1 d1  Z2 "d2 f  d3     YZ3 d4 d5 d6  Z4 e1 d4 d7 d5 e1 d8  Z5 d S(9   sU  Code to work with GenBank -- http://www.ncbi.nlm.nih.gov/

Classes:
Iterator              Iterate through a file of GenBank entries
Dictionary            Access a GenBank file using a dictionary interface.
ErrorFeatureParser    Catch errors caused during parsing.
FeatureParser         Parse GenBank data in Seq and SeqFeature objects.
RecordParser          Parse GenBank data into a Record object.
NCBIDictionary        Access GenBank using a dictionary interface.

_BaseGenBankConsumer  A base class for GenBank consumer that implements
                      some helpful functions that are in common between
                      consumers.
_FeatureConsumer      Create SeqFeature objects from info generated by
                      the Scanner
_RecordConsumer       Create a GenBank record object from Scanner info.
_PrintingConsumer     A debugging consumer.

_Scanner              Set up a Martel based GenBank parser to parse a record.

ParserFailureError    Exception indicating a failure in the parser (ie.
                      scanner or consumer)
LocationParserError   Exception indiciating a problem with the spark based
                      location parser.

Functions:
index_file            Get a GenBank file ready to be used as a Dictionary.
search_for            Do a query against GenBank.
download_many         Download many GenBank records.

s   LocationParsers   Records   genbank_formatN(   s   handler(   s   RecordReader(   s	   SeqRecord(   s   Alphabet(   s   IUPAC(   s   Seq(   s   File(   s   Index(   s   AbstractConsumer(   s   NCBI(   s   RequestLimiter(   s   EventGenerator(   s	   Reference(   s
   SeqFeature(   s   LocationParsers
   Dictionaryc      sV   K d  Z  M N d Z O e d  Z _ d   Z b d   Z u d   Z x d   Z RS(   sF   Allow a GenBank file to be accessed using a dictionary interface.
    s
   __filenamec    sG   O Z [ t  i  |  |  _ \ t |  i t i  |  _ ] | |  _	 d S(   s  Initialize and open up a GenBank dictionary.

        Arguments:
        o index_file - The name of the file that servers as the dictionary
        index. You need to use the index_file function in this module
        to creat this index.
        o parser - An optional argument specifying a parser object that
        the records should be run through before returning the output. If
        parser is None then the unprocessed contents of the file will be
        returned.
        N(
   s   Indexs
   index_files   selfs   _indexs   opens
   Dictionarys   _Dictionary__filename_keys   _handles   parsers   _parser(   s   selfs
   index_files   parser(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__O s   c    s   _ ` t  |  i  Sd  S(   N(   s   lens   selfs   _index(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __len___ s   c    s   b d e d G|  i i   GHg |  i | \ } } h d G| Gd G| GHk |  i i |  l |  i i |  } m d G| GHp |  i
 t j	 o  q |  i
 i t i |   Sn s | Sd S(   s.   Retrieve an item from the dictionary.
        s   keys:s   start:s   len:s   data:N(   s   selfs   _indexs   keyss   keys   starts   lens   _handles   seeks   reads   datas   _parsers   Nones   parses   Files   StringHandle(   s   selfs   keys   lens   datas   start(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __getitem__b s    c    s   u v t  |  i |  Sd  S(   N(   s   getattrs   selfs   _indexs   name(   s   selfs   name(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __getattr__u s   c    s   x   |  i i   }  g  }  x; | d  r. }  | d  d j o  | i |  n q- W x# | d  r }  | i |  qn W | Sd S(   s  Provide valid keys from the index.

        If keys is just called on the index (using getattr) then
        the index will return internal values such as '__filename' which
        we just don't want to see. This just strips these values out
        before returning.
        i    i   s   __N(   s   selfs   _indexs   keyss   all_keyss   keys_to_removes   keys   appends   remove(   s   selfs   keys   keys_to_removes   all_keys(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   keysx s   	 	 	(   s   __doc__s   _Dictionary__filename_keys   Nones   __init__s   __len__s   __getitem__s   __getattr__s   keys(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   DictionaryK s   		s   Iteratorc      s,    d  Z    e d d  Z  d   Z RS(   sM   Iterator interface to move over a file of GenBank entries one at a time.
    i    c    s      | o  | i   }  t o  | i d  d j p
 t d   x?  d o4  | i   }  | i d  d j o  Pn qO W | i    | i   n  t i	 | d  |  _  | |  _ d S(   s=  Initialize the iterator.

        Arguments:
        o handle - A handle with GenBank entries to iterate through.
        o parser - An optional parser to pass the entries through before
        returning them. If None, then the raw entry will be returned.
        o has_header - Whether or not the file to iterate over has one of
        those GenBank headers (ie. if you downloaded it directly from
        GenBank). If so, we'll iterate over the header to get past it, and
        then the iterator will be set up to return the first record in
        the file.
        s   Genetic Sequence Data Banki    s&   Doesn't seem to have a GenBank header.i   s   reported sequencess   LOCUSN(   s
   has_headers   handles   readlines
   first_lines	   __debug__s   finds   AssertionErrors   cur_lines   RecordReaders
   StartsWiths   selfs   _readers   parsers   _parser(   s   selfs   handles   parsers
   has_headers   cur_lines
   first_line(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__ s   
* 
c    sd      |  i i   }  |  i t j	 o.  | o   |  i i t i |   Sn n  | Sd S(   sl   Return the next GenBank record from the handle.

        Will return None if we ran out of records.
        N(	   s   selfs   _readers   nexts   datas   _parsers   Nones   parses   Files   StringHandle(   s   selfs   data(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   next s   
$(   s   __doc__s   Nones   __init__s   next(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   Iterator s   	s   ParserFailureErrorc      s    d  Z    RS(   s:   Failure caused by some kind of problem in the parser.
    (   s   __doc__(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   ParserFailureError s   	s   LocationParserErrorc      s    d  Z    RS(   sA   Could not Properly parse out a location from a GenBank file.
    (   s   __doc__(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   LocationParserError s   	s   ErrorParserc      s)    d  Z    e d  Z  d   Z RS(   s1  Parse GenBank files and attempt to catch errors.

    This is just a small wrapper class around a passed parser which
    catches errors that occur in the parser.

    When errors occur, we'll raise a ParserFailureError, which returns
    (hopefully helpful) information about where the error occured
    c    s"      | |  _  | |  _ d S(   s   Initialize an ErrorFeatureParser.

        Arguments:
        o parser - The actual parser to use in parsing the records.
        o bad_file_handle - A handle to write problem GenBank files to
        If None, the files will not be saved.
        N(   s   bad_file_handles   selfs   _bad_file_handles   parsers   _parser(   s   selfs   parsers   bad_file_handle(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__ s   c    s      | i   }  y   |  i i t i |   SWn  t i	 i
 t i	 i f j
 oJ  |  i o  |  i i |  n  t d |  i i i i   n>  t j
 o. }  t d | |  i i i i f   n Xd S(   s$   Parse the specified handle.
        s   Could not parse record %ss(   Could not parse location %s in record %sN(   s   handles   reads   records   selfs   _parsers   parses   Files   StringHandles   Martels   Parsers   ParserExceptions   ParserPositionExceptions   _bad_file_handles   writes   ParserFailureErrors	   _consumers   datas   ids   LocationParserErrors   msg(   s   selfs   handles   msgs   record(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   parse s    ##(   s   __doc__s   Nones   __init__s   parse(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   ErrorParser s   	s   FeatureParserc      s,    d  Z    d d d  Z  d   Z RS(   s4   Parse GenBank files into Seq + Feature objects.
    i    i   c    s(      t  |  |  _  | |  _ d S(   s  Initialize a GenBank parser and Feature consumer.

        Arguments:
        o debug_level - An optional argument that species the amount of
        debugging information Martel should spit out. By default we have
        no debugging info (the fastest way to do things), but if you want
        you can set this as high as two and see exactly where a parse fails.
        o use_fuzziness - Specify whether or not to use fuzzy representations.
        The default is 1 (use fuzziness).
        N(   s   _Scanners   debug_levels   selfs   _scanners   use_fuzziness(   s   selfs   debug_levels   use_fuzziness(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__ s   
c    sE    t  |  i  |  _ |  i i | |  i  |  i i Sd S(   s$   Parse the specified handle.
        N(   s   _FeatureConsumers   selfs   use_fuzzinesss	   _consumers   _scanners   feeds   handles   data(   s   selfs   handle(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   parse s   (   s   __doc__s   __init__s   parse(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   FeatureParser s   	s   RecordParserc      s)   d  Z  	
d d  Z d   Z RS(   s,   Parse GenBank files into Record objects
    i    c    s   
t  |  |  _ d S(   s[  Initialize the parser.

        Arguments:
        o debug_level - An optional argument that species the amount of
        debugging information Martel should spit out. By default we have
        no debugging info (the fastest way to do things), but if you want
        you can set this as high as two and see exactly where a parse fails.
        N(   s   _Scanners   debug_levels   selfs   _scanner(   s   selfs   debug_level(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__
s   c    s?   t    |  _ |  i i | |  i  |  i i Sd S(   s:   Parse the specified handle into a GenBank record.
        N(   s   _RecordConsumers   selfs	   _consumers   _scanners   feeds   handles   data(   s   selfs   handle(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   parses   (   s   __doc__s   __init__s   parse(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   RecordParsers   	s   _BaseGenBankConsumerc      sz   d  Z  !&d g Z (d   Z +d   Z 8d   Z Cd   Z Ud   Z ad   Z kd   Z td	   Z	 RS(
   s   Abstract GenBank consumer providing useful general functions.

    This just helps to eliminate some duplication in things that most
    GenBank consumers want to do.
    s   translationc    s
   ()d  S(   N(    (   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__(s   c    sk   +-/| d d j o 0| d  } n
 2| } 3t i | d  } 4t t i |  } 6| Sd S(   s;   Split a string of keywords into a nice clean list.
        i   s   .s   ;N(   s   keyword_strings   keywordss   strings   splits   keyword_lists   maps   strips   clean_keyword_list(   s   selfs   keyword_strings   clean_keyword_lists   keyword_lists   keywords(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _split_keywords+s   	c    sS   8:<t  i | d d  } >t  i | d  } ?t t  i |  } A| Sd S(   s9   Split a string of accession numbers into a list.
        s   
s    N(	   s   strings   replaces   accession_strings	   accessions   splits   accession_lists   maps   strips   clean_accession_list(   s   selfs   accession_strings	   accessions   accession_lists   clean_accession_list(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _split_accessions8s
   c    s   CEF| d d j o G| d  } n
 I| } Jt i | d  } Kg  } Lx5 | d Lr( } M| i d  } N| i |  q` WOx% Od | j o P| i	 d  q WQt
 t i |  } S| Sd S(   s7   Split a string with taxonomy info into a list.
        i   s   .s   ;i    s   
s    N(   s   taxonomy_strings   tax_infos   strings   splits   tax_lists   new_tax_lists   tax_items	   new_itemss   extends   removes   maps   strips   clean_tax_list(   s   selfs   taxonomy_strings   tax_items   tax_lists	   new_itemss   new_tax_lists   tax_infos   clean_tax_list(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _split_taxonomyCs   		 	 c    sN   UZ[| } \x. t i d \r } ]t i | | d  } q W_| Sd S(   s   Clean whitespace out of a location string.

        The location parser isn't a fan of whitespace, so we clean it out
        before feeding it into the parser.
        i    s    N(   s   location_strings   location_lines   strings
   whitespaces   wss   replace(   s   selfs   location_strings   wss   location_line(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _clean_locationUs   	 	c    sN   aced d g } fx( | d fr } g| i | d  } q! Wi| Sd S(   sJ   Remove any newlines in the passed text, returning the new string.
        s   
s   i    s    N(   s   newliness   wss   texts   replace(   s   selfs   texts   newliness   ws(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _remove_newlinesas    	c    sW   kmo| i d  } px% pd | j o q| i d  q Wrt i |  Sd S(   sG   Replace multiple spaces in the passed text with single spaces.
        s    s    N(   s   texts   splits
   text_partss   removes   strings   join(   s   selfs   texts
   text_parts(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _normalize_spacesks    c    s   tvw| i d d  Sd S(   s0   Remove all spaces from the passed text.
        s    s    N(   s   texts   replace(   s   selfs   text(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _remove_spacests   (
   s   __doc__s   remove_space_keyss   __init__s   _split_keywordss   _split_accessionss   _split_taxonomys   _clean_locations   _remove_newliness   _normalize_spacess   _remove_spaces(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _BaseGenBankConsumers   	
	s   _FeatureConsumerc      s  yd  Z  d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d	   Z	 d
   Z
 d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z 	d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z #d   Z +d   Z ;d   Z Hd   Z md   Z d    Z  d!   Z! d"   Z" d#   Z# d$   Z$ d%   Z% (d&   Z& 6d'   Z' <d(   Z( ?d)   Z) Bd*   Z* Ed+   Z+ Md,   Z, RS(-   s   Create a SeqRecord object with Features to return.

    Attributes:
    o use_fuzziness - specify whether or not to parse with fuzziness in
    feature locations.
    c    s   t  i |   t t d t |  _ | |  _ d |  _ d |  _	 t |  _
 t |  _ t |  _ t |  _ d  S(   Ns   ids    (   s   _BaseGenBankConsumers   __init__s   selfs	   SeqRecords   Nones   datas   use_fuzzinesss   _use_fuzzinesss	   _seq_types	   _seq_datas   _current_refs   _cur_features   _cur_qualifier_keys   _cur_qualifier_value(   s   selfs   use_fuzziness(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__s   c    s   | |  i _ d S(   s?   Set the locus name is set as the name of the Sequence.
        N(   s
   locus_names   selfs   datas   name(   s   selfs
   locus_name(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   locuss   c    s
   d  S(   N(    (   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   sizes   c    s   | |  _ d S(   sK   Record the sequence type so we can choose an appropriate alphabet.
        N(   s   types   selfs	   _seq_type(   s   selfs   type(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   residue_types   c    s   | |  i i d <d  S(   Ns   data_file_division(   s   divisions   selfs   datas   annotations(   s   selfs   division(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   data_file_divisions   c    s   | |  i i d <d  S(   Ns   date(   s   submit_dates   selfs   datas   annotations(   s   selfs   submit_date(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   dates   c    s   | |  i _ d S(   s?   Set the definition as the description of the sequence.
        N(   s
   definitions   selfs   datas   description(   s   selfs
   definition(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   definitions   c    sc   |  i |  } |  i i t j o1 t |  d j o | d |  i _ n n d S(   s   Set the accession number as the id of the sequence.

        If we have multiple accession numbers, the first one passed is
        used.
        i    N(   s   selfs   _split_accessionss   acc_nums   new_acc_numss   datas   ids   Nones   len(   s   selfs   acc_nums   new_acc_nums(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys	   accessions
   c    s   | |  i i d <d  S(   Ns   nid(   s   contents   selfs   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   nids   c    s   | |  i _ d S(   s   Set the version to overwrite the id.

        Since the verison provides the same information as the accession
        number, plus some extra info, we set this as the id if we have
        a version.
        N(   s
   version_ids   selfs   datas   id(   s   selfs
   version_id(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   versions   c    s   | |  i i d <d  S(   Ns   gi(   s   contents   selfs   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   gis   c    s#   |  i |  |  i i d <d  S(   Ns   keywords(   s   selfs   _split_keywordss   contents   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   keywordss   c    s   | |  i i d <d  S(   Ns   segment(   s   contents   selfs   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   segments   c    sJ   | d d j o | d  } n
 | } | |  i i d <d  S(   Ni   s   .s   source(   s   contents   source_infos   selfs   datas   annotations(   s   selfs   contents   source_info(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   sources   	c    s   | |  i i d <d  S(   Ns   organism(   s   contents   selfs   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   organisms   c    s#   |  i |  |  i i d <d  S(   Ns   taxonomy(   s   selfs   _split_taxonomys   contents   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   taxonomys   c    s`   |  i t j	 o! |  i i d i |  i  n g  |  i i d <t   |  _ d S(   s8   Signal the beginning of a new reference object.
        s
   referencesN(   s   selfs   _current_refs   Nones   datas   annotationss   appends	   Reference(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   reference_nums
   !c 	   sV  | d d !} g  } t i | d  d j o t i | d  d j o | d } t i | d  } xt | d rg } t i | d  \ } } t
 i t t i |   t t i |    } | i |  q WnM | d j p t i |  d j o n  t d | |  i i f   | |  i _ d	 S(
   s   Attempt to determine the sequence region the reference entails.

        Possible types of information we may have to deal with:
        
        (bases 1 to 86436)
        (sites)
        (bases 1 to 105654; 110423 to 111122)
        i   s   basess   toi   s   ;i    s   sitess)   Could not parse base info %s in record %sN(   s   contents   ref_base_infos   all_locationss   strings   finds   splits   all_base_infos	   base_infos   starts   ends
   SeqFeatures   FeatureLocations   ints   strips   this_locations   appends
   ValueErrors   selfs   datas   ids   _current_refs   location(	   s   selfs   contents   ref_base_infos   this_locations   starts   all_locationss   all_base_infos   ends	   base_info(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   reference_basess    	7 	&c    s   	
| |  i _ d  S(   N(   s   contents   selfs   _current_refs   authors(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   authors	s   c    s   | |  i _ d  S(   N(   s   contents   selfs   _current_refs   title(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   titles   c    s   | |  i _ d  S(   N(   s   contents   selfs   _current_refs   journal(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   journals   c    s   | |  i _ d  S(   N(   s   contents   selfs   _current_refs
   medline_id(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   medline_ids   c    s   | |  i _ d  S(   N(   s   contents   selfs   _current_refs	   pubmed_id(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys	   pubmed_ids   c    s   | |  i _ d  S(   N(   s   contents   selfs   _current_refs   comment(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   remarks   c    s&   t  i | d  |  i i d <d  S(   Ns   
s   comment(   s   strings   joins   contents   selfs   datas   annotations(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   comments   c    s    !|  i   d S(   sH   Get ready for the feature table when we reach the FEATURE line.
        N(   s   selfs   start_feature_table(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   features_lines   c    sJ   #%'|  i t j	 o- (|  i i d i |  i  )t |  _ n d S(   s>   Indicate we've got to the start of the feature table.
        s
   referencesN(   s   selfs   _current_refs   Nones   datas   annotationss   append(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   start_feature_table#s   c    sY   +12|  i oB 5|  i   7d |  _ 8d |  _ 9|  i i i |  i  n d S(   s   Utility function to add a feature to the SeqRecord.

        This does all of the appropriate checking to make sure we haven't
        left any info behind, and that we are only adding info if it
        exists.
        s    N(   s   selfs   _cur_features   _add_qualifiers   _cur_qualifier_keys   _cur_qualifier_values   datas   featuress   append(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _add_feature+s   c    s[   ;=|  i   @t i   |  _ A| |  i _ E|  i d j o Fd |  i _ n d  S(   Ns   DNAi   (   s   selfs   _add_features
   SeqFeatures   _cur_features   contents   types	   _seq_types   strand(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   feature_key;s
   c    s   HNU|  i |  } \t i | d  d j o) ]t i | d  } ^| d | !} n ay bt i t i	 |   } Wn% et j
 o ft |   n Xk|  i | |  i  d S(   s   Parse out location information from the location string.

        This uses Andrew's nice spark based parser to do the parsing
        for us, and translates the results of the parse into appropriate
        Location objects.
        s   replacei   s   ,i   N(   s   selfs   _clean_locations   contents   location_lines   strings   finds	   comma_poss   LocationParsers   parses   scans
   parse_infos
   SystemExits   LocationParserErrors   _set_location_infos   _cur_feature(   s   selfs   contents
   parse_infos   location_lines	   comma_pos(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   locationHs   c    s   mwxt  o! t | t i  p t d |  {| i d j o@ }d | _ x) | i	 d r } |  i | |  q` Wna | i d j p | i d j p | i d j o |  i | |  n t d | i   d	 S(
   sT  Set the location information based on a function.

        This handles all of the location functions like 'join', 'complement'
        and 'order'.

        Arguments:
        o function - A LocationParser.Function object specifying the
        function we are acting on.
        o cur_feature - The feature to add information to.
        s"   Expected a Function object, got %ss
   complementi   i    s   joins   orders   one-ofs   Unexpected function name: %sN(   s	   __debug__s
   isinstances   functions   LocationParsers   Functions   AssertionErrors   names   cur_features   strands   argss
   inner_infos   selfs   _set_location_infos   _set_ordering_infos
   ValueError(   s   selfs   functions   cur_features
   inner_info(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _set_functionms   
+ 	3c    s   x | i d r } t i   } | i d | i | _ | i | _ | i	 | _	 | i
 | _
 |  i | |  | i i |  q W| i d i i } | i d i i } t i | |  | _ d S(   s   Parse a join or order and all of the information in it.

        This deals with functions that order a bunch of locations,
        specifically 'join' and 'order'. The inner locations are
        added as subfeatures of the top level feature
        i    s   _i   N(   s   functions   argss   inner_elements
   SeqFeatures   new_sub_features   cur_features   types   names   refs   ref_dbs   strands   selfs   _set_location_infos   sub_featuress   appends   locations   starts   feature_starts   ends   feature_ends   FeatureLocation(   s   selfs   functions   cur_features   new_sub_features   feature_ends   inner_elements   feature_start(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _set_ordering_infos    	c    s   | t j o d Snu t | t i  o |  i | |  d SnA t | t i  o |  i	 | |  n t
 d |   d S(   s   Set the location information for a feature from the parse info.

        Arguments:
        o parse_info - The classes generated by the LocationParser.
        o cur_feature - The feature to add the information to.
        Ns!   Could not parse location info: %s(   s
   parse_infos   Nones
   isinstances   LocationParsers   AbsoluteLocations   selfs   _set_locations   cur_features   Functions   _set_functions
   ValueError(   s   selfs
   parse_infos   cur_feature(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _set_location_infos   c    s]   | i t j	 o( | i i | _ | i i | _ n |  i	 | i
  | _  d S(   s   Set the location information for a feature.

        Arguments:
        o location - An AbsoluteLocation object specifying the info
        about the location.
        o cur_feature - The feature to add the information to.
        N(   s   locations   paths   Nones	   accessions   cur_features   refs   databases   ref_dbs   selfs   _get_locations   local_location(   s   selfs   locations   cur_feature(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _set_locations
   c    s   t  | t i  o) |  i |  } t i | |  Sn> |  i | i	  } |  i | i  } t i | |  Sd S(   sn  Return a (possibly fuzzy) location from a Range object.

        Arguments:
        o range_info - A location range (ie. something like 67..100). This
        may also be a single position (ie 27).

        This returns a FeatureLocation object.
        If parser.use_fuzziness is set at one, the positions for the
        end points will possibly be fuzzy.
        N(   s
   isinstances
   range_infos   LocationParsers   Ranges   selfs   _get_positions   poss
   SeqFeatures   FeatureLocations   lows	   start_poss   highs   end_pos(   s   selfs
   range_infos   end_poss   poss	   start_pos(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _get_locations   
c    sW  t  | t i  o t i | i  } n t  | t i  o t i	 | i
 i  } n t  | t i  o t i | i
 i  } n  t  | t i  o( t i | i i | i i  } nR t  | t i  o( t i | i i | i i  } n 	t d |   |  i o | Sn t i | i  Sd S(   s|  Return a (possibly fuzzy) position for a single coordinate.

        Arguments:
        o position - This is a LocationParser.* object that specifies
        a single coordinate. We will examine the object to determine
        the fuzziness of the position.

        This is used with _get_location to parse out a location of any
        end_point of arbitrary fuzziness.
        s#   Unexpected LocationParser object %rN(   s
   isinstances   positions   LocationParsers   Integers
   SeqFeatures   ExactPositions   vals	   final_poss   LowBounds   AfterPositions   bases	   HighBounds   BeforePositions   Betweens   BetweenPositions   lows   highs   TwoBounds   WithinPositions
   ValueErrors   selfs   _use_fuzzinesss   location(   s   selfs   positions	   final_pos(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _get_positions"   
c    s   |  i ov |  i } d }  xA  |  i i i |  o' !|  i t |  } "| d } q. W%|  i |  i i | <n d S(   s  Add a qualifier to the current feature without loss of info.

        If there are multiple qualifier keys with the same name we
        would lose some info in the dictionary, so we append a unique
        number to the end of the name in case of conflicts.
        i   N(	   s   selfs   _cur_qualifier_keys   unique_names   counters   _cur_features
   qualifierss   has_keys   strs   _cur_qualifier_value(   s   selfs   counters   unique_name(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _add_qualifiers   	 c    sq   (*,|  i   /t i | d d  } 0t i | d d  } 1t i |  } 3| |  _ 4d |  _ d S(   sA   When we get a qualifier key, use it as a dictionary key.
        s   /s    s   =N(	   s   selfs   _add_qualifiers   strings   replaces   contents   qual_keys   strips   _cur_qualifier_keys   _cur_qualifier_value(   s   selfs   contents   qual_key(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   qualifier_key(s   c    s+   68t  i | d d  } :| |  _ d  S(   Ns   "s    (   s   strings   replaces   contents
   qual_values   selfs   _cur_qualifier_value(   s   selfs   contents
   qual_value(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   qualifier_value6s   c    s
   <=d  S(   N(    (   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   origin_name<s   c    s
   ?@d  S(   N(    (   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   base_count?s   c    s
   BCd  S(   N(    (   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   base_numberBs   c    sF   EGHt  i | d d  } It  i |  } K|  i | 7_ d S(   s2   Add up sequence information as we get it.
        s    s    N(   s   strings   replaces   contents   new_seqs   uppers   selfs	   _seq_data(   s   selfs   contents   new_seq(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   sequenceEs   c    s   MOQ|  i   Wt i } Y|  i o Zt i |  i d  d j o [t i } nj \t i |  i d  d j o ]t i	 } n: ^|  i d j o _t i
 } n bt d |  i   n ft |  i |  |  i _ d S(   s1   Clean up when we've finished the record.
        s   DNAi   s   RNAs   PROTEINs,   Could not determine alphabet for seq_type %sN(   s   selfs   _add_features   Alphabets   generic_alphabets   seq_alphabets	   _seq_types   strings   finds   generic_dnas   generic_rnas   generic_proteins
   ValueErrors   Seqs	   _seq_datas   datas   seq(   s   selfs   contents   seq_alphabet(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   record_endMs     (-   s   __doc__s   __init__s   locuss   sizes   residue_types   data_file_divisions   dates
   definitions	   accessions   nids   versions   gis   keywordss   segments   sources   organisms   taxonomys   reference_nums   reference_basess   authorss   titles   journals
   medline_ids	   pubmed_ids   remarks   comments   features_lines   start_feature_tables   _add_features   feature_keys   locations   _set_functions   _set_ordering_infos   _set_location_infos   _set_locations   _get_locations   _get_positions   _add_qualifiers   qualifier_keys   qualifier_values   origin_names
   base_counts   base_numbers   sequences
   record_end(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _FeatureConsumerysZ   		)% (s   _RecordConsumerc      s  hd  Z  jkd   Z sd   Z vd   Z yd   Z |d   Z d   Z d   Z d   Z d	   Z	 d
   Z
 d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d    Z  d!   Z! d"   Z" d#   Z# d$   Z$ RS(%   sG   Create a GenBank Record object from scanner generated information.
    c    sM   klt  i |   mt i   |  _ ot |  _ pt |  _ qt |  _ d  S(   N(	   s   _BaseGenBankConsumers   __init__s   selfs   Records   datas   Nones   _cur_references   _cur_features   _cur_qualifier(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__ks
   c    s   st| |  i _ d  S(   N(   s   contents   selfs   datas   locus(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   locusss   c    s   vw| |  i _ d  S(   N(   s   contents   selfs   datas   size(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   sizevs   c    s   yz| |  i _ d  S(   N(   s   contents   selfs   datas   residue_type(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   residue_typeys   c    s   |}| |  i _ d  S(   N(   s   contents   selfs   datas   data_file_division(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   data_file_division|s   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   date(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   dates   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas
   definition(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   definitions   c    s/   |  i |  } |  i i i |  d  S(   N(   s   selfs   _split_accessionss   contents   new_accessionss   datas	   accessions   extend(   s   selfs   contents   new_accessions(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys	   accessions   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   nid(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   nids   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   version(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   versions   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   gi(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   gis   c    s   |  i |  |  i _ d  S(   N(   s   selfs   _split_keywordss   contents   datas   keywords(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   keywordss   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   segment(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   segments   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   source(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   sources   c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   organism(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   organisms   c    s   |  i |  |  i _ d  S(   N(   s   selfs   _split_taxonomys   contents   datas   taxonomy(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   taxonomys   c    s[   |  i t j	 o |  i i i |  i  n t i   |  _ | |  i _	 d S(   sK   Grab the reference number and signal the start of a new reference.
        N(
   s   selfs   _cur_references   Nones   datas
   referencess   appends   Records	   References   contents   number(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   reference_nums
   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references   bases(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   reference_basess   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references   authors(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   authorss   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references   title(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   titles   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references   journal(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   journals   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references
   medline_id(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   medline_ids   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references	   pubmed_id(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys	   pubmed_ids   c    s   | |  i _ d  S(   N(   s   contents   selfs   _cur_references   remark(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   remarks   c    s"   t  i | d  |  i _ d  S(   Ns   
(   s   strings   joins   contents   selfs   datas   comment(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   comments   c    s   |  i   d S(   sH   Get ready for the feature table when we reach the FEATURE line.
        N(   s   selfs   start_feature_table(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   features_lines   c    s:   |  i t j	 o |  i i i |  i  n d S(   s/   Signal the start of the feature table.
        N(   s   selfs   _cur_references   Nones   datas
   referencess   append(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   start_feature_tables   c    s8   |  i   t i   |  _ | |  i _ d S(   sK   Grab the key of the feature and signal the start of a new feature.
        N(   s   selfs   _add_features   Records   Features   _cur_features   contents   key(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   feature_keys   c    sv   |  i t j	 oY |  i t j	 o |  i i i |  i  n t |  _ |  i i i |  i  n d S(   s   Utility function to add a feature to the Record.

        This does all of the appropriate checking to make sure we haven't
        left any info behind, and that we are only adding info if it
        exists.
        N(   s   selfs   _cur_features   Nones   _cur_qualifiers
   qualifierss   appends   datas   features(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _add_features   c    s   |  i |  |  i _ d  S(   N(   s   selfs   _clean_locations   contents   _cur_features   location(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   locations   c    sX   |  i t j	 o |  i i i |  i  n t i   |  _ | |  i _	 d  S(   N(
   s   selfs   _cur_qualifiers   Nones   _cur_features
   qualifierss   appends   Records	   Qualifiers   contents   key(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   qualifier_keys   c    s   |  i |  } xN |  i i d r; } |  i i i	 |  d j o |  i
 |  } n q' W|  i |  |  i _ d  S(   Ni    (   s   selfs   _remove_newliness   contents   cur_contents	   __class__s   remove_space_keyss   remove_space_keys   _cur_qualifiers   keys   finds   _remove_spacess   _normalize_spacess   value(   s   selfs   contents   remove_space_keys   cur_content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   qualifier_values    	c    s   | |  i _ d  S(   N(   s   contents   selfs   datas   base_counts(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   base_counts   c    s    | |  i _ d  S(   N(   s   contents   selfs   datas   origin(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   origin_names   c    s=   t  i | d d  } |  i i t  i |  7_ d  S(   Ns    s    (   s   strings   replaces   contents   new_seqs   selfs   datas   sequences   upper(   s   selfs   contents   new_seq(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   sequences   c    s   
|  i   d S(   sD   Signal the end of the record and do any necessary clean-up.
        N(   s   selfs   _add_feature(   s   selfs   content(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   record_ends   (%   s   __doc__s   __init__s   locuss   sizes   residue_types   data_file_divisions   dates
   definitions	   accessions   nids   versions   gis   keywordss   segments   sources   organisms   taxonomys   reference_nums   reference_basess   authorss   titles   journals
   medline_ids	   pubmed_ids   remarks   comments   features_lines   start_feature_tables   feature_keys   _add_features   locations   qualifier_keys   qualifier_values
   base_counts   origin_names   sequences
   record_end(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _RecordConsumerhsJ   	
		c    s2   t  t i |   } t i | d  Sd S(   sm  Combine multiple lines of content separated by spaces.

    This function is used by the EventGenerator callback function to
    combine multiple lines of information. The lines are first
    stripped to remove whitepsace, and then combined so they are separated
    by a space. This is a simple minded way to combine lines, but should
    work for most cases.
    s    N(   s   maps   strings   strips	   line_lists   stripped_line_lists   join(   s	   line_lists   stripped_line_list(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _strip_and_combines   s   _Scannerc      s)   d  Z   !d d  Z Gd   Z RS(   s   Start up Martel to do the scanning of the file.

    This initialzes the Martel based parser and connects it to a handler
    that will generate events for a Feature Consumer.
    i    c  "  s   !,/d d d d d d d d d	 d
 d d d d d d d d d d d d d d d d d d d d d d  d! d" g" |  _ @d g |  _ Ct i t i D|  i  } E| i d# |  |  _
 d$ S(%   s  Initialize the scanner by setting up our caches.

        Creating the parser takes a long time, so we want to cache it
        to reduce parsing time.

        Arguments:
        o debug - The level of debugging that the parser should
        display. Level 0 is no debugging, Level 2 displays the most
        debugging info (but is much slower). See Martel documentation
        for more info on this.
        s   locuss   sizes   residue_types   data_file_divisions   dates
   definitions	   accessions   nids   versions   gis   keywordss   segments   sources   organisms   taxonomys   reference_nums   reference_basess   authorss   titles   journals
   medline_ids	   pubmed_ids   remarks   comments   features_lines   feature_keys   locations   qualifier_keys   qualifier_values   origin_names
   base_counts   base_numbers   sequences
   record_ends   debug_levelN(   s   selfs   interest_tagss   exempt_tagss   Martels   select_namess   genbank_formats   records
   expressions   make_parsers   debugs   _parser(   s   selfs   debugs
   expression(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__!s   rc    sg   GMN|  i i t | O|  i Pt Q|  i   R|  i i t	 i
    T|  i i |  d S(   s   Feeed a set of data into the scanner.

        Arguments:
        o handle - A handle with the information to parse.
        o consumer - The consumer that should be informed of events.
        N(   s   selfs   _parsers   setContentHandlers   EventGenerators   consumers   interest_tagss   _strip_and_combines   exempt_tagss   setErrorHandlers   handlers   ErrorHandlers	   parseFiles   handle(   s   selfs   handles   consumer(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   feedGs   	(   s   __doc__s   __init__s   feed(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _Scanners   	&s   locuss	   accessioni    c    s  Vz{y |d k  l } l } Wn) }t j
 o ~t d d   n X| o t i i t i i	 | |   ov | i | |  } | i i d  oG | i d |  } | t i i |   j o d GHd Sn n n n t i i |  o t i |  n | i | |  } | i |   | i | d | | |  } t i }	 |	 i   }
 |
 i! |  | i |   |
 i" t# |  d   d S(	   sK  Index a GenBank file into a database for quick loading.

    WARNING: This is very experimental and subject to change.
    It requires the use of Andrew Dalke's mindy.

    This is very similar to index_file, but uses a database instead
    of a flat file to store the information about the genbank_file.

    Arguments:

    o genbank_file - The GenBank formatted file that we want to index.

    o db_name - The name of the database to create. This name will allow you
    to retrieve the file later.

    o db_directory - The directory where the database information should be
    stored.

    o identifier - The primary identifier used to store records in the file
    under. This will be used for retrieving them later.

    o aliases - Secondary identifiers that point to the record. These can
    be used for searching if a primary identifier is not found. This is
    useful for GenBank since we'll index by a single identifier (the LOCUS
    identifier by default) but might want to search by some other
    identifier.

    o keywords - More advanced Mindy features that I'm not positive
    how to make full use of right now.

    o always_index - A flag indicating whether or not to index a file even
    if the file appears not to have changed. By default, the function will
    try to skip indexing if it thinks the file hasn't changed.
    (   s   mindy_indexs   mindy_searchs   You must have mindy installed:
s0   http://www.biopython.org/~dalke/mindy-0.1.tar.gzs
   file_sizess"   File already indexed. Skipping....Ns   genbank_records   rb($   s   mindys   mindy_indexs   mindy_searchs   ImportErrors
   SystemExits   always_indexs   oss   paths   existss   joins   db_directorys   db_names
   mindy_opens	   search_dbs
   mindy_datas   has_keys   genbank_files	   file_sizes   getsizes   makedirss   creates   mindy_dbs   use_filenames   SimpleIndexers
   identifiers   aliasess   keywordss   indexers   genbank_formats   record_formats	   gb_formats   make_parsers   parsers   setContentHandlers	   parseFiles   open(   s   genbank_files   db_names   db_directorys
   identifiers   aliasess   keywordss   always_indexs   mindy_dbs	   file_sizes	   gb_formats   parsers   indexers   mindy_indexs	   search_dbs   mindy_search(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   index_file_dbVs0   $%	s   MindyDictionaryc      sY   d  Z  e d  Z d   Z d   Z d   Z d   Z d   Z RS(   s1  Access a GenBank file using a dictionary interface, though a Mindy DB.

    WARNING: This is very experimental and subject to change.
    It requires the use of Andrew Dalke's mindy.

    This is the Dictionary interface to use after you create an index
    database using the function index_file_db.
    c    sq   y d k  l } Wn) t j
 o t d d   n X| i | |  |  _ | |  _
 d S(   s  Initialize and open up a GenBank dictionary.

        Arguments:
        
        o db_name - The name of the database we should retrieve information
        from.

        o db_directory - The location of the database specified in db_name.
        
        o parser - An optional argument specifying a parser object that
        the records should be run through before returning the output. If
        parser is None then the unprocessed contents of the file will be
        returned.
        (   s   mindy_searchs   You must have mindy installed:
s0   http://www.biopython.org/~dalke/mindy-0.1.tar.gzN(   s   mindys   mindy_searchs   ImportErrors
   SystemExits
   mindy_opens   db_directorys   db_names   selfs   _searchs   parsers   _parser(   s   selfs   db_names   db_directorys   parsers   mindy_search(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__s   c    s   t  |  i i  Sd  S(   N(   s   lens   selfs   _searchs   identifiers(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __len__s   c    s  y |  i | } Wn t j
 o |  i i i | g   } t |  d j o t d |   nB t |  d j o t d |   n |  i | d } n X|  i	 t
 j	 o  |  i	 i t i |   Sn t i |  Sd S(   s  Retrieve an item from the indexed file.

        The key can be either a primary identifier or an alias. The lookup
        will first try to get the file via the primary identifier, and if
        it can't do this, will subsequently try to get it through the
        aliases to these keys. If the aliases are ambigous, an error will
        be raised.

        Most of the time I find it easiest to search by aliases (the GenBank
        accession numbers), but YMMV.
        i    s   No records found for key %si   s!   Multiple records found for key %sN(   s   selfs   _searchs   keys   datas   KeyErrors   aliasess   gets   idss   lens   _parsers   Nones   parses   Files   StringHandle(   s   selfs   keys   datas   ids(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __getitem__s    c    s   t  |  i |  Sd  S(   N(   s   getattrs   selfs   _indexs   name(   s   selfs   name(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __getattr__s   c    s   |  i i i   Sd S(   s:   Provide all identifiers for the current database.
        N(   s   selfs   _searchs   identifierss   keys(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   keyss   c    s   |  i i i   Sd S(   s5   Provide all aliases in the current database.
        N(   s   selfs   _searchs   aliasess   keys(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   aliasess   (   s   __doc__s   Nones   __init__s   __len__s   __getitem__s   __getattr__s   keyss   aliases(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   MindyDictionarys   	c 	   s  t  i i |   o  t d |    n t i | d d } |  | t i	 <t
 t |   d t   } x%d o| i i | i i } | i   } 	| i i | i i | } | t j o Pn | t j	 o | |  } n7 t | i  d j o | i d } n
 t } | t j o t d   n+ | i |  o t d |   n  | | f | | <q Wd S(	   s  Index a GenBank file to prepare it for use as a dictionary.

    Arguments:
    o genbank_file - The name of the GenBank file to be index.
    o index_name - The name of the index file which will be created.
    o rec_to_key - A function object which, when called with a GenBank
    record object, will return a key to be used for the record. If no
    function is specified, then the accession numbers will be used as
    the keys.
    s   %s does not exists   truncatei   s   parseri    s   Empty sequence key produceds   Duplicate key %s foundN(   s   oss   paths   existss   genbank_files
   ValueErrors   Indexs
   index_files   indexs
   Dictionarys   _Dictionary__filename_keys   Iterators   opens   RecordParsers   gb_iters   _readers	   positionss   starts   nexts   recs   lengths   Nones
   rec_to_keys   keys   lens	   accessions   KeyErrors   has_key(	   s   genbank_files
   index_files
   rec_to_keys   lengths   recs   keys   starts   indexs   gb_iter(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   index_files.   
 
	s   NCBIDictionaryc      s   "d  Z  '(d d e d  Z =d   Z ?d   Z Ad   Z Cd   Z Ed   Z Gd	   Z Id
   Z	 Kd   Z
 Nd   Z Ve d  Z ]d   Z RS(   sN   Access GenBank using a read-only dictionary interface.

    Methods:
    
    s
   Nucleotidef5.0c    s   (23| |  _  4t |  |  _ 5| d j o 6d |  _ n- 7| d j o 8d |  _ n :t d  ;| |  _ d S(   s  NCBIDictionary([database][, delay][, parser])

        Create a new Dictionary to access GenBank.  database should be
        either 'Nucleotide' or 'Protein'.  delay is the number of
        seconds to wait between each query (5 default).  parser is an
        optional parser object to change the results into another
        form.  If unspecified, then the raw contents of the file will
        be returned.

        s
   Nucleotides   GenBanks   Proteins   GenPepts-   database should be 'Nucleotide' or 'Protein'.N(   s   parsers   selfs   RequestLimiters   delays   limiters   databases   formats
   ValueError(   s   selfs   databases   delays   parser(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__(s   
c    s   =>t  d  d  S(   Ns    GenBank contains lots of entries(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __len__=s   c    s   ?@t  d  d  S(   Ns   This is a read-only dictionary(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   clear?s   c    s   ABt  d  d  S(   Ns   This is a read-only dictionary(   s   NotImplementedError(   s   selfs   keys   item(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __setitem__As   c    s   CDt  d  d  S(   Ns   This is a read-only dictionary(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   updateCs   c    s   EFt  d  d  S(   Ns   You don't need to do this...(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   copyEs   c    s   GHt  d  d  S(   Ns#   You don't really want to do this...(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   keysGs   c    s   IJt  d  d  S(   Ns#   You don't really want to do this...(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   itemsIs   c    s   KLt  d  d  S(   Ns#   You don't really want to do this...(   s   NotImplementedError(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   valuesKs   c    sC   NOPy Q|  | Wn Rt j
 o Sd Sn XTd Sd S(   s   S.has_key(id) -> booli    i   N(   s   selfs   ids   KeyError(   s   selfs   id(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   has_keyNs   c    sB   VWy X|  | SWn Yt j
 o Z| Sn X[d  d  S(   Ns   How did I get here?(   s   selfs   ids   KeyErrors   failobj(   s   selfs   ids   failobj(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   getVs
   c    s_  ]cf|  i i   hy+ it i d |  i d |  i d | } Wn$ kt	 j
 o } ot |  n Xr| i   } s| i d  d j o tt |  n w| i d  d j o xt |  nY {| i d  d j o |t |  n0 }| i   i d  d j o ~t d	  n |  i t j	 o |  i i |  Sn | i   Sd
 S(   s   S.__getitem__(id) -> object

        Return the GenBank entry.  id is the GenBank ID (gi) of the
        entry.  Raises a KeyError if there's an error.
        
        s   Texts   dopts   uids   does not existi    s   Please try again later.s   intentionally withdrawns   htmls,   I unexpectedly got back html-formatted data.N(   s   selfs   limiters   waits   NCBIs   querys   databases   formats   ids   handles   IOErrors   xs   KeyErrors   peeklines   lines   finds   lowers   parsers   Nones   parses   read(   s   selfs   ids   lines   handles   x(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __getitem__]s$   +(   s   __doc__s   Nones   __init__s   __len__s   clears   __setitem__s   updates   copys   keyss   itemss   valuess   has_keys   gets   __getitem__(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   NCBIDictionary"s   	s
   Nucleotidei  c  
  s   | d d g j o t d  n d t i f d     Y} |   } t i d | d |  d d	 d
 | } | i | i    | i Sd S(   s4  search_for(search[, database][, max_ids])

    Search GenBank and return a list of GenBank identifiers (gi's).
    search is the search string used to search the database.  database
    should be either 'Nucleotide' or 'Protein'.  max_ids is the maximum
    number of ids to retrieve (default 500).
    
    s
   Nucleotides   Proteins*   database must be 'Nucleotide' or 'Protein's   ResultParserc      s   d   Z  d   Z RS(   Nc    s&   t  i i |   g  |  _ d  S(   N(   s   sgmllibs
   SGMLParsers   __init__s   selfs   ids(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__s   c    se  t  } x6 | d r) \ } } | d j o | } n q W| o d  Sn t i |  \ } }
 } } } } | d d j o d  Sn | i d  } g  i } | d r } | | i d   q ~ } t  } t  }	 x6 | d r) \ } } | d j o | } n q
W| t  j	 o |  i i |  n d  S(   Ni    s   hrefi
   s
   query.fcgis   &amp;s   =s	   list_uids(   s   Nones   hrefs   attrss   names   values   urlparses   schemes   netlocs   paths   paramss   querys   frags   splits   appends   _[1]s   xs	   list_uidss   dbs   selfs   ids(   s   selfs   attrss   schemes   querys   paths   paramss   hrefs   _[1]s   values   dbs   netlocs	   list_uidss   names   xs   frag(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   start_as*   	 $ %		 (   s   __init__s   start_a(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   ResultParsers   s   Searchs   terms   doptcmdls   DocSums   dispmaxN(   s   databases
   ValueErrors   sgmllibs
   SGMLParsers   ResultParsers   parsers   NCBIs   querys   searchs   max_idss   handles   feeds   reads   ids(   s   searchs   databases   max_idss   ResultParsers   parsers   handle(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys
   search_fors   $f127.0c    s   d t  i f d     Y} t |  }
 x |  o d i |  |   } |
 i
   t i d | d | d d d d	 d
 | }	 |	 i   } |   } | i |  x# | i d r } | |  q W|  | }  q7 Wd S(   sb  download_many(gis, callback_fn[, delay][, batchsize])

    Download many records from GenBank.  gis is a list of Genbank
    Gi's.  Each time a record is downloaded, callback_fn is called
    with the text of the record.  delay is the number of seconds to
    wait between requests.  Waits 127 seconds by default.  abatchsize
    is the number of records to request each time.  Default is 500
    records, which is the maximum NCBI can handle.

    This does not check to make sure all gi's are returned.  The
    client must make sure that the gi's are valid.  This may be
    implemented in the future.

    s   _RecordExtractorc      s5   d   Z  d   Z d   Z d   Z RS(   Nc    s2   t  i i |   g  |  _ d |  _ d  S(   Ni    (   s   sgmllibs
   SGMLParsers   __init__s   selfs   recordss
   _in_record(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   __init__s   c    s   d |  _ g  |  _ d  S(   Ni   (   s   selfs
   _in_records   _current_record(   s   selfs
   attributes(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys	   start_pres   c    s2   |  i i d i |  i   d |  _ d  S(   Ns    i    (   s   selfs   recordss   appends   joins   _current_records
   _in_record(   s   self(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   end_pres   c    s+   |  i o |  i i |  n d  S(   N(   s   selfs
   _in_records   _current_records   appends   data(   s   selfs   data(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   handle_datas   (   s   __init__s	   start_pres   end_pres   handle_data(    (    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   _RecordExtractors   s   ,s   Retrieves	   list_uidss   dopts   GenBanks   txts   ons   dispmaxi    N(   s   sgmllibs
   SGMLParsers   _RecordExtractors   RequestLimiters   delays   limiters   giss   joins	   batchsizes   gi_strs   waits   NCBIs   querys   dbs   handles   reads   resultss	   extractors   feeds   recordss   recs   callback_fn(   s   giss   callback_fns	   broken_fns   dbs   delays	   batchsizes   parsers   _RecordExtractors   recs   handles   limiters   gi_strs   resultss	   extractor(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   download_manys     
 	(6   s   __doc__s   __all__s   strings   oss   res   sgmllibs   urlparses   xml.saxs   handlers   Martels   RecordReaders   Bio.SeqRecords	   SeqRecords   Bios   Alphabets   Bio.Alphabets   IUPACs   Bio.Seqs   Seqs   Files   Indexs   Bio.ParserSupports   AbstractConsumers   Bio.WWWs   NCBIs   RequestLimiters   EventGenerators   genbank_formats   Records   Bio.SeqFeatures	   References
   SeqFeatures   Bio.GenBanks   LocationParsers
   Dictionarys   Iterators	   Exceptions   ParserFailureErrors   LocationParserErrors   ErrorParsers   FeatureParsers   RecordParsers   _BaseGenBankConsumers   _FeatureConsumers   _RecordConsumers   _strip_and_combines   _Scanners   index_file_dbs   MindyDictionarys   Nones
   index_files   NCBIDictionarys
   search_fors   download_many(*   s   oss   EventGenerators   ParserFailureErrors   Files   RequestLimiters   RecordParsers   urlparses	   SeqRecords   Iterators
   Dictionarys   index_file_dbs   AbstractConsumers   MindyDictionarys   Seqs   download_manys   _strip_and_combines   Records   FeatureParsers   genbank_formats   IUPACs   strings   ErrorParsers   sgmllibs
   SeqFeatures   _RecordConsumers   _Scanners   _FeatureConsumers   handlers   res   __all__s   _BaseGenBankConsumers   LocationParserErrors   Martels
   index_files   RecordReaders   LocationParsers   NCBIs	   References
   search_fors   Alphabets   Indexs   NCBIDictionary(    (    s-   /mit/seven/lib/python/Bio/GenBank/__init__.pys   ?$ sV   B-+] ;OO.b6