*ė
<«;;c       sŃ     d  Z    d k Z ! y " d k TWn$ # e j
 o Z $ e d  n X& d k l Z ( d f  d     YZ 9 d   Z ? d   Z	 K d	   Z
 P e d
  Z a e
 e d  Z ~ e
 e d  Z d S(   s  kNN.py

This module provides code for doing k-nearest-neighbors classification.

k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.

Glossary:
distance   The distance between two points in the feature space.
weight     The importance given to each point for classification. 


Classes:
kNN           Holds information for a nearest neighbors classifier.


Functions:
train        Train a new kNN classifier.
calculate    Calculate the probabilities of each class, given an observation.
classify     Classify an observation into a class.

    Distance Functions:
euclidean_dist  The euclidean distance between two points.

    Weighting Functions:
equal_weight    Every example is given a weight of 1.

N(   s   *s   This module requires NumPy(   s   listfnss   kNNc      s   ( d  Z  1 2 d   Z RS(   s  Holds information necessary to do nearest neighbors classification.

    Members:
    classes  List of the possible classes.
    xs       List of the neighbors.
    ys       List of the classes that the neighbors belong to.
    k        Number of neighbors to look at.

    c    s:   2 3 4 g  |  _ 5 g  |  _ 6 g  |  _ 7 t |  _ d S(   s   kNN()N(   s   selfs   classess   xss   yss   Nones   k(   s   self(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   __init__2 s
   (   s   __doc__s   __init__(    (    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   kNN( s   		c    sT   9 : ; t  |   t  |  j o < t d  n = t i t |  | d   Sd S(   s:   euclidean_dist(x, y) -> euclidean distance between x and ys   vectors must be same lengthi   N(   s   lens   xs   ys
   ValueErrors   maths   sqrts   sum(   s   xs   y(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   euclidean_dist9 s   c    s   ? @ D t  |   t  |  j o E t d  n F d } G x< t t  |    d G r# } H | |  | | | d 7} qS WI t i |  Sd S(   s=   euclidean_dist_py(x, y) -> euclidean distance between x and ys   vectors must be same lengthi    i   N(	   s   lens   xs   ys
   ValueErrors   sums   ranges   is   maths   sqrt(   s   xs   ys   sums   i(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   euclidean_dist_py? s   	 	!c    s   K L N d Sd S(   s   equal_weight(x, y) -> 1i   N(    (   s   xs   y(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   equal_weightK s   c    s_   P Y Z t    } [ t i |  | _ \ t |  |  | _ ] | | _ ^ | | _	 _ | Sd S(   sN  train(xs, ys, k) -> kNN
    
    Train a k nearest neighbors classifier on a training set.  xs is a
    list of observations and ys is a list of the class assignments.
    Thus, xs and ys should contain the same number of elements.  k is
    the number of neighbors that should be examined when doing the
    classification.
    
    N(
   s   kNNs   knns   listfnss   itemss   yss   classess   asarrays   xss   typecodes   k(   s   xss   yss   ks   typecodes   knn(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   trainP s   	c 
   s  a k l t  |  } n g  } o xQ t t |  i   d o r5 } p | | |  i |  }	 q | i
 |	 | f  q9 Wr | i   u h  } v x# |  i d v r } w d | | <q Wx xU | |  i  d x rA \ }	 } y |  i | } z | | | | |  i |  | | <qĒ W| | Sd S(   s½  calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict

    Calculate the probability for each class.  knn is a kNN object.  x
    is the observed data.  weight_fn is an optional function that
    takes x and a training example, and returns a weight.  distance_fn
    is an optional function that takes two points and returns the
    distance between them.  Returns a dictionary of the class to the
    weight given to the class.
    
    i    f0.0N(   s   asarrays   xs   orders   ranges   lens   knns   xss   is   distance_fns   dists   appends   sorts   weightss   classess   ks   yss   klasss	   weight_fn(
   s   knns   xs	   weight_fns   distance_fns   orders   klasss   weightss   is   ks   dist(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys	   calculatea s"   
	 		 	 )c 	   s   ~   t  |  | d | d | }  t }  t }  xR | i	   d  r? \ } }  | t j p
 | | j o  | }  | } n qH W | Sd S(   są   classify(knn, x[, weight_fn][, distance_fn]) -> class

    Classify an observation into a class.  If not specified, weight_fn will
    give all neighbors equal weight and distance_fn will be the euclidean
    distance.

    s	   weight_fns   distance_fni    N(   s	   calculates   knns   xs	   weight_fns   distance_fns   weightss   Nones
   most_classs   most_weights   itemss   klasss   weight(	   s   knns   xs	   weight_fns   distance_fns   klasss   weightss
   most_classs   most_weights   weight(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   classify~ s   		 	(   s   __doc__s   maths   Numerics   ImportErrors   xs	   Bio.Toolss   listfnss   kNNs   euclidean_dists   euclidean_dist_pys   equal_weights   Nones   trains	   calculates   classify(
   s   maths   euclidean_dists   xs   classifys   trains   equal_weights   euclidean_dist_pys   listfnss	   calculates   kNN(    (    sF   /mit/seven/lib/python2.1/site-packages/Bio/Tools/Classification/kNN.pys   ? s   