/* ===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*            National Center for Biotechnology Information (NCBI)
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's official duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government do not place any restriction on its use or reproduction.
*  We would, however, appreciate having the NCBI and the author cited in
*  any work or product based on this material.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
* ===========================================================================
*
* File Name:  alignmgr2.h
*
* Author:  Sarah Wheelan
*
* Version Creation Date:  10/01 
*
* $Revision: 6.10 $
*
* File Description: SeqAlign indexing, access, and manipulation functions
*
* Modifications:
* --------------------------------------------------------------------------
* $Log: alignmgr2.h,v $
* Revision 6.10  2001/12/14 12:38:35  wheelan
* added functions for ddv
*
* Revision 6.9  2001/11/30 16:55:07  wheelan
* added AlnMgr2PadConservatively
*
* Revision 6.8  2001/11/29 17:37:02  wheelan
* added ExtendToCoords and MergeTwoAlignments
*
* Revision 6.7  2001/11/13 14:35:33  wheelan
* added new field to AMSmall structure
*
* Revision 6.6  2001/11/08 19:55:32  wheelan
* added AlnMgr2GetNthRowSpanInSA
*
* Revision 6.5  2001/10/23 12:13:24  wheelan
* added #define AM_HARDSTOP
*
* Revision 6.4  2001/10/16 12:00:00  wheelan
* added GetParent and FreeEitherIndex
*
* Revision 6.3  2001/10/08 18:43:25  wheelan
* added comments
*
* Revision 6.2  2001/10/03 18:12:51  wheelan
* changed some colliding defines
*
* Revision 6.1  2001/10/03 14:20:30  wheelan
* initial checkin
*
* ==========================================================================
*/

#ifndef _ALIGNMGR2_
#define _ALIGNMGR2_

#include <ncbi.h>
#include <sqnutils.h>
#include <salutil.h>
#include <salpedit.h>
#include <samutil.h>

#undef NLM_EXTERN
#ifdef NLM_IMPORT
#define NLM_EXTERN NLM_IMPORT
#else
#define NLM_EXTERN extern
#endif

#ifdef __cplusplus
extern "C" {
#endif

/* defines for frequency matrix sizes */
#define AM_NUCSIZE   6
#define AM_PROTSIZE  26

/* defines for AlnMgr2ComputeScoreForPairwiseSeqAlign */
#define AM_GAPOPEN  -11
#define AM_GAPEXT   -1

/* defines for saip->indextype */
#define INDEX_CHILD   1
#define INDEX_PARENT  2

/* defines for amp->type */
#define AM_SEQ        1
#define AM_GAP        2

#define AM2_LEFT_TAIL   0
#define AM2_RIGHT_TAIL  1

/* defines for AlnMgr2AddInNewSA */
#define AM_START    -1
#define AM_STOP      1
#define AM_HARDSTOP  3

/* defines for amaip->alnstyle */
#define AM_CONTIG_LINEAR  1
#define AM_INTER_LINEARF  2
#define AM_INTER_LINEAR   3
#define AM_CONTIG_NONLIN  4
#define AM_INTER_NONLINF  5
#define AM_INTER_NONLIN   6
#define AM2_LITE          7
#define AM2_FULLINDEX     8

/* defines for AMEdge.used */
#define AM_NOTUSED   0
#define AM_USED      1
#define AM_CONFLICT  2
#define AM_USED2     3

typedef struct am_sarowdat {
   Uint2Ptr  sect;
   Uint2Ptr  unsect;
   Uint2Ptr  insect;
   Uint2Ptr  unaligned;
   Uint2     numsect;
   Uint2     numunsect;
   Uint2     numinsect;
   Uint2     numunaln;
} SARowDat2, PNTR SARowDat2Ptr;

typedef struct am_saindex {
   Uint1                  indextype;
   SeqAlignIndexFreeFunc  freefunc;
   Uint4Ptr               aligncoords;
   Int4                   anchor;
   SARowDat2Ptr           PNTR srdp;
   Int4                   numrows;
   Int4                   numseg;
   Int4                   numunaln;
   Uint4Ptr               unaln;
   Int4                   numinchain;
   Int4                   numsplitaln;
   Int4                   score;
   Boolean                aligned;
   SeqAlignPtr            top;
} SAIndex2, PNTR SAIndex2Ptr;

NLM_EXTERN Boolean LIBCALLBACK SAIndex2Free2(VoidPtr index);

typedef struct am_unalign {
   Int4Ptr  starts;
   Int4Ptr  lens;
   Int4     which_side;
} AMUnalign, PNTR AMUnalignPtr;

typedef struct am_insert {
   Int4Ptr  starts;
   Int4Ptr  lens;
   Int4     which_side;
} AMInsert, PNTR AMInsertPtr;

typedef struct am_parcel {
   Int4          alnstart;
   Int4          sap_source;
   AMUnalignPtr  unaligned;
   AMInsertPtr   inserts;
} AMParcel, PNTR AMParcelPtr;
   
typedef struct am_alignindex {
   Uint1                  indextype;
   SeqAlignIndexFreeFunc  freefunc;
   Uint2                  alnstyle;
   Int4                   anchor;
   Int4                   numrows;
   SeqIdPtr               PNTR ids;  /* one SeqId per row */
   Int4Ptr                aligncoords;
   Int4                   numsaps;
   SeqAlignPtr            PNTR saps;
   Boolean                PNTR aligned; /* for each sap -- is it used in the overall alignment? */
   SeqAlignPtr            sharedaln;
} AMAlignIndex2, PNTR AMAlignIndex2Ptr;

NLM_EXTERN Boolean LIBCALLBACK AMAlignIndex2Free2(VoidPtr index);

typedef struct am_adjacent {
   Int4  from;
   Int4  to;
} AMAdjac, PNTR AMAdjacPtr;

typedef struct am_msg2 {
/* fields filled in by calling function */
   Int4        from_aln; /* from is in alignment coordinates */
   Int4        to_aln;   /* to is in alignment coordinates */
   Int4        row_num; /* which row the function wants to retrieve */
/* fields filled in by AlnMgr2GetNextAlnBit */
   Int4        from_row;
   Int4        to_row;
   Uint1       strand;
   Uint1       type; /* AM_SEQ or AM_GAP */
   AMAdjacPtr  left_insert;
   AMAdjacPtr  right_insert;
   AMAdjacPtr  left_unaligned;
   AMAdjacPtr  right_unaligned;
/* fields used internally */
   Int4        len;
   Int4        real_from;
} AlnMsg2, PNTR AlnMsg2Ptr;

NLM_EXTERN AlnMsg2Ptr AlnMsgNew2(void);
NLM_EXTERN AlnMsg2Ptr AlnMsgFree2(AlnMsg2Ptr amp);
NLM_EXTERN void AlnMsgReNew2(AlnMsg2Ptr amp);

typedef struct am_small {
   Int4  n1;
   Int4  n2;
   Int4  n3;
   Int4  n4;
   struct am_small PNTR next;
} AM_Small2, PNTR AM_Small2Ptr;

typedef struct am_coreinf {
   Int4      start_core;
   Int4      len;
   Int4      sap_num;
   Int4      row;
   SeqIdPtr  sip;
   Int4      start_aln;
   Int4      left;
   Int4      right;
} AM_Core, PNTR AM_CorePtr;

typedef struct am_rowinf {
   Int4  from;
   Int4  len;
   struct am_rowinf PNTR next;
} AMRowInfo, PNTR AMRowInfoPtr; 

typedef struct am_condenserow {
   SeqIdPtr  sip;
   Uint1     strand;
   Int4      rownum;
} AMCdRow, PNTR AMCdRowPtr;

typedef struct am_interval {
   Int4   from;
   Int4   to;
   Uint1  strand;
   struct am_interval PNTR next;
} AMInterval, PNTR AMIntervalPtr;

typedef struct am_intervalset {
   SeqIdPtr       sip;
   AMIntervalPtr  int_head;
   Uint1          strand;
   struct am_intervalset PNTR next;
} AMIntervalSet, PNTR AMIntervalSetPtr;

typedef struct am_edge {
   Int4         vertex1;
   Int4         vertex2;
   Int4         weight;
   Int4         used;
   Boolean      aligned;
   SeqAlignPtr  sap;
   struct am_edge PNTR next;
} AMEdge, PNTR AMEdgePtr;

typedef struct am_vertex {
   SeqIdPtr  sip;
   Int4      from;
   Int4      to;
   Uint1     strand;
   Int4      numedges;
   Boolean   used;
   Boolean   visited;
   struct am_vertex PNTR next;
} AMVertex, PNTR AMVertexPtr;

typedef struct am_queue {
   AMVertexPtr  vertex;
   struct am_queue PNTR next;
} AMQueue, PNTR AMQueuePtr;

typedef struct am_segment {
   Int4         len;
   Int4         which_row;
   SeqAlignPtr  sap;
   Int4         aligncoord;
   struct am_segment PNTR next;
} AMSegment, PNTR AMSegmentPtr;

typedef struct am_rowstart {
   AMSegmentPtr  segment;
   SeqIdPtr      sip;
   struct am_rowstart PNTR next;
} AMRowStart, PNTR AMRowStartPtr;

typedef struct am_frequency {
   Int4Ptr  PNTR freq;
   Int4     len;  /* second dimension */
   Int4     size; /* first dimension */
   Boolean  isna;
} AMFreq, PNTR AMFreqPtr;

typedef struct am_bit {
   Int4  n;
   Int4  num1;
   Int4  num2;
   Int4  num3;
   struct am_bit PNTR next;
} AMBitty2, PNTR AMBitty2Ptr;

/***************************************************************************
*
*  SECTION 1: Functions for allocating and freeing data structures used
*  by the alignment manager.
*
***************************************************************************/
NLM_EXTERN void AMFreqFree(AMFreqPtr afp);
NLM_EXTERN void AMAlignIndexFreeEitherIndex(SeqAlignPtr sap);

/***************************************************************************
*
*  SECTION 2: Functions used to create the indexes for parent and child
*  seqaligns.
*    SECTION 2a: Functions to create indexes for child seqaligns, and
*                to convert seqaligns to dense-seg type
*    SECTION 2b: Functions to unpack and rearrange complicated seqaligns
*                into simple chains of dense-seg and dense-diag types
*    SECTION 2c: Functions to create indexes for parent seqaligns
*    SECTION 2d: Accessory functions for parent indexing
*
***************************************************************************/
/* SECTION 2a */
/***************************************************************************
*
*  AlnMgr2IndexSingleChildSeqAlign takes a simple dense-seg or dense-diag
*  seqalign, converts it to dense-seg, and then calls
*  AlnMgr2IndexSingleDenseSegSA to create the indexes. If the alignment has
*  already been indexed, this erases that index and reindexes the alignment.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap);

/* SECTION 2c */
/***************************************************************************
*
*  AlnMgr2IndexLite takes a seqalign or a list of seqaligns, converts
*  each alignment to a dense-seg structure and indexes it, and then
*  allocates an AMAlignIndex2 structure and fills in the saps array.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2IndexLite(SeqAlignPtr sap);

/***************************************************************************
*
*  AlnMgr2IndexSeqAlign takes a seqalign of any type except std-seg and
*  creates indexes on it for easy retrieval of useful information by other
*  AlnMgr2 functions. If the seqalign is a single alignment, that alignment
*  gets a simple index and is left alone otherwise. If the seqalign is
*  a set of alignments or a dense-diag set, the subalignments get
*  individually indexed and then are combined into a (fake) multiple
*  alignment which also gets indexed. The subalignments can now be accessed
*  as a multiple alignment by AlnMgr2 functions.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2IndexSeqAlign(SeqAlignPtr sap);

/***************************************************************************
*
*  AlnMgr2ReIndexSeqAlign takes an indexed alignment (that has, presumably,
*  been changed), makes sure all child seqaligns are indexed (if they are
*  already indexed they are not reindexed), and reindexes all the child
*  seqaligns as a set.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap);

/***************************************************************************
*
*  AlnMgr2IndexIndexedChain takes a linked list of indexed seqaligns
*  and does an in-place transformation to an indexed parent-child
*  seqalign set.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2IndexIndexedChain(SeqAlignPtr sap);

/***************************************************************************
*
*  SECTION 3: Functions for debugging
*
***************************************************************************/
NLM_EXTERN void am_print_sa_index(SeqAlignPtr sap, FILE *ofp);
NLM_EXTERN void AlnMgr2PrintSeqAlign(SeqAlignPtr sap, Int4 linesize, Boolean isnuc, FILE * ofp);

/***************************************************************************
*
*  SECTION 4: API-level functions (and their helper functions) used to
*  access an indexed alignment.
*    SECTION 4a: AlnMgr2GetNextAlnBit and associated functions
*    SECTION 4b: "GetNth" functions
*    SECTION 4c: other functions for accessing the alignment
*
***************************************************************************/
/* SECTION 4a */
/***************************************************************************
*
*  AlnMgr2GetNextAlnBit takes an indexed seqalign and returns it, piece
*  by piece, in the row and across the range specified in the AlnMsg
*  structure. amp->from_aln and amp->to_aln must be filled in; these are
*  in alignment coordinates. AlnMgr2GetNextAlnBit will return the AlnMsg
*  structure with amp->from_row and amp->to_row filled in. If amp->type is
*  AM_SEQ, these numbers are sequence coordinates; if amp->type is AM_GAP
*  the numbers are alignment coordinates and there is a gap in that row.
*  AlnMgr2GetNextAlnBit returns one continuous piece of sequence or gap
*  at each call, and keeps returning TRUE until it has returned all the
*  information for the piece of the alignment requested.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2GetNextAlnBit(SeqAlignPtr sap, AlnMsg2Ptr amp);

/* SECTION 4b */
/***************************************************************************
*
*  AlnMgr2GetNthStrand takes an indexed seqalign and a row number and 
*  returns the strand of the row indicated. A return of 0 indicates
*  an error.
*
***************************************************************************/
NLM_EXTERN Uint1 AlnMgr2GetNthStrand(SeqAlignPtr sap, Int4 n);

/***************************************************************************
*
*  AlnMgr2GetNthSeqIdPtr returns the seqid (this is a duplicated,
*  allocated seqid that must be freed) of the nth row (1-based) of an
*  indexed parent or child seqalign.
*
***************************************************************************/
NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap, Int4 n);

/***************************************************************************
*
*  AlnMgr2GetNthSeqRangeInSA returns the smallest and largest sequence
*  coordinates contained in the nth row of an indexed seqalign. Either
*  start or stop can be NULL to only retrieve one of the coordinates.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);

/***************************************************************************
*
*  AlnMgr2GetNthRowSpanInSA returns the least and greatest alignment
*  coordinates (inclusive) spanned by the indicated row. Either stop or
*  start can be NULL to retrieve just one of the coordinates.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);

NLM_EXTERN Int4 AlnMgr2GetMaxTailLength(SeqAlignPtr sap, Uint1 which_tail);

/***************************************************************************
*
*  AlnMgr2GetNthRowTail returns the sequence extremities that are not
*  contained in the alignment (if the alignment starts at 10 in row 2, the
*  tail in that row is 0-9). It takes an indexed seqalign, a 1-based row
*  number, and AM2_LEFT_TAIL or AM2_RIGHT_TAIL, and returns the start, stop,
*  and strand of the tail indicated in the row desired. AlnMgr2GetNthRowTail
*  returns TRUE if the calculations were successfully completed.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2GetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand);

/* SECTION 4c */
/***************************************************************************
*
*  AlnMgr2GetAlnLength returns the total alignment length of an indexed
*  alignment. If fill_in is TRUE, the function computes the total length
*  of all the internal unaligned regions and adds that to the alignment
*  length; otherwise only the aligned portions are considered.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2GetAlnLength(SeqAlignPtr sap, Boolean fill_in);

/* SECTION 4c functions for DDV */
NLM_EXTERN Boolean AlnMgr2IsSAPDiscAli(SeqAlignPtr sap);
NLM_EXTERN Int4 AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap);
NLM_EXTERN Boolean AlnMgr2GetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);

/***************************************************************************
*
*  AlnMgr2GetNthUnalignedForNthRow returns the bioseq coordinates for the
*  requested row, in the requested unaligned region. Any error will result
*  in -1 returns for both start and stop.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop);

/***************************************************************************
*
*  AlnMgr2GetNextLengthBit is called in a loop on an indexed alignment, with
*  seg starting at 0, to return the lengths of the aligned and unaligned
*  regions. If the length returned is negative, it's an unaligned region;
*  otherwise it's aligned.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2GetNextLengthBit(SeqAlignPtr sap, Int4Ptr len, Int4Ptr seg);

/***************************************************************************
*
*  AlnMgr2GetNumRows returns the number of rows in an indexed seqalign.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2GetNumRows(SeqAlignPtr sap);

/***************************************************************************
*
*  AlnMgr2GetNumSegs returns the number of gap- or aligned- contiguous
*  segments in the alignment (continuous or not).
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2GetNumSegs(SeqAlignPtr sap);

/***************************************************************************
*
*  AlnMgr2GetNumSegsInRange returns the number of alignment segments
*  spanned by the given range (partially or fully). The range is
*  given in alignment coordinates.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2GetNumSegsInRange(SeqAlignPtr sap, Int4 start, Int4 stop, Int4Ptr start_seg);

/***************************************************************************
*
*  AlnMgr2GetNthSegmentRange returns the alignment coordinate range of the
*  Nth segment (count starts at 1) of the seqalign. start and stop are
*  optional arguments (in case only one end is desired).
*
***************************************************************************/
NLM_EXTERN void AlnMgr2GetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);

/***************************************************************************
*
*  AlnMgr2GetFirstNForSip returns the first row that a seqid occurs on,
*  or -1 if the seqid is not in the alignment or if there is another
*  error.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip);

/***************************************************************************
*
*  AlnMgr2GetParent returns the top-level seqalign associated with a given
*  indexed alignment. It returns the actual pointer, not a copy.
*
***************************************************************************/
NLM_EXTERN SeqAlignPtr AlnMgr2GetParent(SeqAlignPtr sap);

/***************************************************************************
*
*  SECTION 5: Functions to change, assign or retrieve an anchor row.
*    SECTION 5a: functions for child seqaligns
*    SECTION 5b: functions for parent seqaligns
*    SECTION 5c: functions to retrieve anchor row information
*
***************************************************************************/
/* SECTION 5b */

/***************************************************************************
*
*  AlnMgr2AnchorSeqAlign takes an indexed seqalign and a row (1-based) and
*  reindexes the alignment so that there are no gaps in the row indicated.
*  Other rows may contain inserts after this operation. After an alignment
*  is anchored, its length often shrinks.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2AnchorSeqAlign(SeqAlignPtr sap, Int4 which_row);

/* SECTION 5c */
/***************************************************************************
*
*  AlnMgr2FindAnchor returns the row number (1-based) of the anchor row
*  for an indexed seqalign, or -1 if the alignment is unanchored or if
*  there is another type of error.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2FindAnchor(SeqAlignPtr sap);

/***************************************************************************
*
*  SECTION 6: Functions for coordinate conversion (bioseq to seqalign
*  coordinates and vice versa)
*
***************************************************************************/

/***************************************************************************
*
*  AlnMgr2MapBioseqToSeqAlign takes an indexed seqalign, a position in a
*  row of the alignment, and a 1-based row number, and maps the row position
*  to alignment coordinates.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row);

/***************************************************************************
*
*  AlnMgr2MapSeqAlignToBioseq takes an indexed seqalign, an alignment
*  coordinate (pos), and the 1-based number of a row, and maps the alignment
*  coordinate to the corresponding bioseq coordinate of the row desired.
*  A return of -1 indicates an error; a return of -2 means that the bioseq
*  is gapped at this alignment position.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap, Int4 pos, Int4 row);

/***************************************************************************
*
*  AlnMgr2MapRowToRow takes an indexed seqalign, a position in row1, the
*  1-based number of row1, and a target row (row2), and maps the bioseq
*  coordinate in row 1 to the corresponding (aligned) bioseq coordinate in
*  row2. A return of -1 indicates an error while a return of -2 means that
*  the bioseq in row2 is gapped at the desired position.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2MapRowToRow(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2);

/***************************************************************************
*
*  SECTION 7: Functions to change an alignment and retrieve parts of an
*    alignment
*
***************************************************************************/
/***************************************************************************
*
*   AlnMgr2TruncateSeqAlign truncates a given seqalign to contain only the
*   bioseq coordinates from start to stop on the indicated row.  Anything
*   before those coordinates is discarded; anything remaining afterwards
*   is made into another seqalign and put in sap->next (the original next,
*   if any, is now at sap->next->next).  Doesn't work on parent seqaligns.
*   The function returns TRUE if the orignal alignment extended past stop.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2TruncateSeqAlign(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row);

/***************************************************************************
*
*  AlnMgr2GetSubAlign retrieves a portion of an indexed alignment, from
*  'from' to 'to' in the row coordinates specified, or if which_row is 0,
*  'from' and 'to' are assumed to be alignment coordinates. If 'to' is -1,
*  the subalignment will go to the end of the specified row (or to the end
*  of the whole alignment).
*
***************************************************************************/
NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row);

/***************************************************************************
*
*  SECTION 8: Miscellaneous functions to compute useful information
*    about an alignment
*
***************************************************************************/
/***************************************************************************
*
*  AlnMgr2ComputeScoreForSeqAlign computes an ad hoc numerical score for
*  an indexed alignment by computing a similarity score for the whole
*  alignment (residue pair by residue pair score, from a matrix for proteins
*  and identity for nucleotides) and then subtracting gap open and gap
*  extension penalties.
*
***************************************************************************/
NLM_EXTERN Int4 AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap);

/***************************************************************************
*
*  AlnMgr2ComputeFreqMatrix takes an indexed seqalign and returns a matrix
*  indicating nucleotide or amino acid frequency at each position of the
*  alignment. The matrix can be made over only a part of the alignment, if
*  from and to are nonzero, and if row is nonzero, from and to are taken
*  to be bioseq coordinates from that row (if row == 0 from and to are
*  assumed to be alignment coordinates).
*
***************************************************************************/
NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row);

/***************************************************************************
*
*  AlnMgr2IsItProtein takes an indexed alignment and quickly decides if
*  it's a protein or nucleotide alignment, returning TRUE for protein.
*
***************************************************************************/
NLM_EXTERN Boolean AlnMgr2IsItProtein(SeqAlignPtr sap);

/***************************************************************************
*
*  SECTION 9: Sorting functions and other algorithms to help order
*  alignments for various purposes
*
***************************************************************************/
/***************************************************************************
*
*  AlnMgr2SortAlnSetByNthRowPos takes an indexed parent alignment and sorts
*  all the child alignments along the row indicated. If the indicated row
*  is aligned on the plus strand, the alignments are sorted from smaller
*  to larger coordinates along that row; otherwise they are sorted in
*  reverse order.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row);

/***************************************************************************
*
*  SECTION 10: Basic alignment operations
*
***************************************************************************/

/***************************************************************************
*
*  AlnMgr2MergeTwoAlignments takes two alignments, with identical rows in
*  the same order (otherwise it rejects the alignments), and merges them
*  into a single alignment. If there is unaligned space between the two
*  alignments and this space is the same length for every row, the function
*  aligns those sequences; it rejects alignments when the unaligned spaces
*  are different sizes. The function returns a newly allocated alignment.
*
***************************************************************************/
NLM_EXTERN SeqAlignPtr AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig, SeqAlignPtr sap2_orig);

/***************************************************************************
*
*  AlnMgr2ExtendToCoords takes an indexed child seqalign and blindly extends
*  it to the coordinates specified on the given row. If other rows are too
*  short to allow this extension, the alignment is extended as far as
*  possible. If to == -1 the extension goes to the end of the sequence
*  specified.
*
***************************************************************************/
NLM_EXTERN void AlnMgr2ExtendToCoords(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row);

/***************************************************************************
*
*  AlnMgr2PadConservatively extends an alignment so that the whole of
*  all sequences is included. If two sequences have tails on the same
*  side, they are each aligned with columns of all gaps:
*
*   <-new aln region->
*   xxxxxxxx----------xxxxxxxxxxxxxxxxxxxx
*   --------xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
*
*  This function returns a newly allocated alignment and doesn't change
*  the original (except for indexing). If the extension was not done for
*  some reason, the function returns NULL;
*
***************************************************************************/
NLM_EXTERN SeqAlignPtr AlnMgr2PadConservatively(SeqAlignPtr sap);

/***************************************************************************/


#ifdef __cplusplus
}
#endif

#undef NLM_EXTERN
#ifdef NLM_EXPORT
#define NLM_EXTERN NLM_EXPORT
#else
#define NLM_EXTERN
#endif

#endif
