/* Copyright (C) 2003 Peter J. Verveer
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met: 
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials provided
 *    with the distribution.
 *
 * 3. The name of the author may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.      
 */

#include "ni_support.h"
#include "ni_filters.h"
#include <stdlib.h>
#include <math.h>
#include <assert.h>

#include "numconfig.h"

#define BUFFER_SIZE 256000

int NI_Correlate1D(PyArrayObject *input, double *weights, int filter_size,
		   int axis, PyArrayObject **output, 
		   PyObject *output_in, NI_ExtendMode mode, double cval, 
		   int shift, NumarrayType output_type)
{
  int symmetric = 0, ii, jj, ll, lines, length, size1, size2, more, irank;
  int itype, idims[NI_MAXDIM];
  double *ibuffer = NULL, *obuffer = NULL, *fw;
  NI_LineBuffer iline_buffer, oline_buffer;

  assert(input != NULL);
  assert(output != NULL);
  assert(weights != NULL);

  /* check filter sizes: */
  if (filter_size < 1) {
    PyErr_SetString(PyExc_RuntimeError, "filter size must be > 0");
    goto exit;
  }

  irank = NI_GetArrayRank(input);
  /* support negative axis specification: */
  if (axis < 0)
    axis += irank;

  itype = NI_GetArrayType(input);
  /* check axis specification: */
  if (irank > 0 && (axis < 0 || axis >= irank)) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    goto exit;
  }

  /* complex types not supported: */
  if (itype == tComplex32 || itype == tComplex64 ||
      output_type == tComplex32 || output_type == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* if no output type was specified, use the input type: */
  if (output_type == tAny)
    output_type = (NumarrayType)itype;

  /* check the filter shift: */
  size1 = filter_size / 2;
  size2 = filter_size - size1 - 1;
  if (size1 + shift < 0 || (filter_size > 0 && size1 + shift >= filter_size)) {
    PyErr_SetString(PyExc_RuntimeError, "shift not within filter extent");
    goto exit;
  }

  fw = weights;

  /* test for symmetry or anti-symmetry: */
  if (filter_size & 0x1) {
    symmetric = 1;
    for(ii = 1; ii <= filter_size / 2; ii++) {
      if (fabs(fw[ii + size1] - fw[size1 - ii]) > DBL_EPSILON) {
	symmetric = 0;
	break;
      }
    }
    if (symmetric == 0) {
      symmetric = -1;
      for(ii = 1; ii <= filter_size / 2; ii++) {
	if (fabs(fw[size1 + ii] + fw[size1 - ii]) > DBL_EPSILON) {
	  symmetric = 0;
	  break;
	}
      }
    }
  }

  /* allocate the output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray(output_type, irank, idims, output_in, output))
    goto exit;


  /* allocate an initialize the line buffers: */
  lines = -1;
  if (!NI_AllocateLineBuffer(input, axis, size1 + shift, size2 - shift,
			     &lines, BUFFER_SIZE, &ibuffer))
    goto exit;

  if (!NI_AllocateLineBuffer(*output, axis, 0, 0, &lines, BUFFER_SIZE, 
			     &obuffer))
    goto exit;

  if (!NI_InitLineBuffer(input, axis, size1 + shift, size2 - shift, lines, 
			 ibuffer, mode, cval, &iline_buffer))
    goto exit;

  if (!NI_InitLineBuffer(*output, axis, 0, 0, lines, obuffer, mode, 0.0, 
			 &oline_buffer))
    goto exit;

  length = irank > 0 ? idims[axis] : 1;
  fw += size1;
  
  /* iterate over all the array lines: */
  do {
    /* copy lines from array to buffer: */
    if (!NI_ArrayToLineBuffer(&iline_buffer, &lines, &more))
      goto exit;
    /* iterate over the lines in the buffers: */
    for(ii = 0; ii < lines; ii++) {
      /* get lines: */
      double *iline = NI_GET_LINE(iline_buffer, ii) + size1;
      double *oline = NI_GET_LINE(oline_buffer, ii);
      /* the correlation calculation: */
      if (symmetric > 0) {
	for(ll = 0; ll < length; ll++) {
	  oline[ll] = iline[0] * fw[0];
	  for(jj = -size1 ; jj < 0; jj++)
	    oline[ll] += (iline[jj] + iline[-jj]) * fw[jj];
	  ++iline;
	}
      } else if (symmetric < 0) {
	for(ll = 0; ll < length; ll++) {
	  oline[ll] = iline[0] * fw[0];
	  for(jj = -size1 ; jj < 0; jj++)
	    oline[ll] += (iline[jj] - iline[-jj]) * fw[jj];
	  ++iline;
	}
      } else {
	for(ll = 0; ll < length; ll++) {
	  oline[ll] = iline[size2] * fw[size2];
	  for(jj = -size1; jj < size2; jj++)
	    oline[ll] += iline[jj] * fw[jj];
	  ++iline;
	}
      }
    }
    /* copy lines from buffer to array: */
    if (!NI_LineBufferToArray(&oline_buffer))
      goto exit;
  } while(more);

 exit:
  if (ibuffer) free(ibuffer);
  if (obuffer) free(obuffer);
  return PyErr_Occurred() ? 0 : 1;
}


#define NI_CORRELATE_POINT(_pi, _weights, _offsets, _filter_size,	\
			   _cvalue, _type, _result, _mv)		\
{									\
  int _ii, _offset = *_offsets;						\
  for(_ii = 0; _ii < _filter_size; _ii++) {				\
    _offset = _offsets[_ii];						\
    if (_offset == _mv)							\
      _result += _weights[_ii] * _cvalue;				\
    else								\
      _result += _weights[_ii] * (double)*(_type*)(_pi + _offset);	\
  }									\
}


int NI_Correlate(PyArrayObject* input, PyArrayObject* weights, 
		 PyArrayObject* footprint, PyArrayObject** output, 
		 PyObject* output_in, NI_ExtendMode mode, double cvalue, 
		 int *shifts, NumarrayType output_type)
{
  int filter_size = 0, *offsets = NULL, size, *oo, jj, wdims[NI_MAXDIM];
  int border_flag_value, msize, wsize, kk, irank, itype, idims[NI_MAXDIM];
  Bool *pm;
  NI_Iterator ii, io;
  NI_FilterIterator fi;
  char *pi, *po;
  Float64 *pw;
  Float64 *ww = NULL;

  /* complex types not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64 ||
      output_type == tComplex32 || output_type == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* if no output type was specified, use the input type: */
  if (output_type == tAny)
    output_type = (NumarrayType)itype;

  if (!NI_ShapeEqual(weights, footprint)) {
    PyErr_SetString(PyExc_RuntimeError, 
		    "footprint and weights must have equal shape");
    goto exit;
  }

  /* footprint must have bool type: */
  if (NI_GetArrayType(footprint) != tBool) {
    PyErr_SetString(PyExc_RuntimeError, "footprint type must be boolean");
    goto exit;
  }

  /* the weights must be of Float64 type: */
  if (NI_GetArrayType(weights) != tFloat64) {
    PyErr_SetString(PyExc_RuntimeError, "weights type must be Float64");
    goto exit;
  }

  irank = NI_GetArrayRank(input);

  /* input and footprint must have the same rank: */
  if (irank != NI_GetArrayRank(footprint)) {
    PyErr_SetString(PyExc_RuntimeError, 
		    "weights and input arrays must have equal rank");
    goto exit;
  }

  /* the weights array must be contigous: */
  if (!PyArray_ISCONTIGUOUS(weights)) {
    PyErr_SetString(PyExc_RuntimeError, "weights array must be contiguous");
    goto exit;
  }

  /* the footprint array must be contigous: */
  if (!PyArray_ISCONTIGUOUS(footprint)) {
    PyErr_SetString(PyExc_RuntimeError, "footprint array must be contiguous");
    goto exit;
  }

  /* get the size of the footprint: */
  pm = (Bool*)NI_GetArrayData(footprint);
  msize = NI_Elements(footprint);
  for(jj = 0; jj < msize; jj++) 
    if (pm[jj]) ++filter_size;

  /* allocate the output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray(output_type, irank, idims, output_in, output))
    goto exit;

  /* filter must be not be emtpy: */
  if (filter_size < 1) {
    PyErr_SetString(PyExc_RuntimeError, "filter size must be > 0");
    goto exit;
  }

  /* calculate the filter offsets: */
  if (!NI_InitFilterOffsetsFromArray(input, footprint, shifts, mode, &offsets, 
				     &border_flag_value))
    goto exit;

  /* initialize input element iterator: */
  if (!NI_InitPointIterator(input, &ii))
    goto exit;

  /* initialize output element iterator: */
  if (!NI_InitPointIterator(*output, &io))
    goto exit;

  /* initialize filter iterator: */
  NI_GetArrayDimensions(weights, wdims);
  if (!NI_InitFilterIterator(irank, wdims, filter_size, idims, shifts, &fi))
    goto exit;
    
  /* get data pointers an sizes: */
  pi = NI_GetArrayData(input);
  po = NI_GetArrayData(*output);
  pw = (Float64*)NI_GetArrayData(weights);
  size = NI_Elements(input);
  wsize = NI_Elements(weights);

  ww = (double*)malloc(filter_size * sizeof(double));
  if (!ww) {
    PyErr_NoMemory();
    goto exit;
  }

  /* copy the weights to contiguous memory: */
  jj = 0;
  for(kk = 0; kk < wsize; kk++)
    if (pm[kk])
      ww[jj++] = pw[kk];

  /* iterator over the elements: */
  oo = offsets;
  for(jj = 0; jj < size; jj++) {
    double tmp = 0.0;
    switch (itype) {
    case tBool:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Bool, tmp,
			 border_flag_value);
      break;
    case tUInt8:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, UInt8, tmp, 
			 border_flag_value);
      break;
    case tUInt16:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, UInt16, tmp, 
			 border_flag_value);
      break;
    case tUInt32:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, UInt32, tmp, 
			 border_flag_value);
      break;
#if HAS_UINT64
    case tUInt64:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, UInt64, tmp, 
			 border_flag_value);
      break;
#endif
    case tInt8:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Int8, tmp, 
			 border_flag_value);
      break;
    case tInt16:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Int16, tmp,
			 border_flag_value);
      break;
    case tInt32:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Int32, tmp,
			 border_flag_value);
      break;
    case tInt64:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Int64, tmp,
			 border_flag_value);
      break;
    case tFloat32:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Float32, tmp,
			 border_flag_value);
      break;
    case tFloat64:
      NI_CORRELATE_POINT(pi, ww, oo, filter_size, cvalue, Float64, tmp,
			 border_flag_value);
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    switch (output_type) {
    case tBool:
      *(Bool*)po = (Bool)tmp;
      break;
    case tUInt8:
      *(UInt8*)po = (UInt8)tmp;
      break;
    case tUInt16:
      *(UInt16*)po = (UInt16)tmp;
      break;
    case tUInt32:
      *(UInt32*)po = (UInt32)tmp;
      break;
#if HAS_UINT64
    case tUInt64:
      *(UInt64*)po = (UInt64)tmp;
      break;
#endif
    case tInt8:
      *(Int8*)po = (Int8)tmp;
      break;
    case tInt16:
      *(Int16*)po = (Int16)tmp;
      break;
    case tInt32:
      *(Int32*)po = (Int32)tmp;
      break;
    case tInt64:
      *(Int64*)po = (Int64)tmp;
      break;
    case tFloat32:
      *(Float32*)po = (Float32)tmp;
      break;
    case tFloat64:
      *(Float64*)po = (Float64)tmp;
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    NI_FILTER_NEXT2(fi, ii, io, oo, pi, po);
  }
  
 exit:
  if (offsets) free(offsets);
  if (ww) free(ww);
  return PyErr_Occurred() ? 0 : 1;
}


int NI_BoxcarFilter1D(PyArrayObject *input, int filter_size, int axis, 
		      PyArrayObject** output, PyObject* output_in, 
		      NI_ExtendMode mode, double cvalue, int shift, 
		      NumarrayType output_type)
{
  int ii, ll, lines, size1, size2, length, more, irank, itype;
  int idims[NI_MAXDIM];
  double tmp, *ibuffer = NULL, *obuffer = NULL, *l1, *l2;
  NI_LineBuffer iline_buffer, oline_buffer;

  if (filter_size < 1) {
    PyErr_SetString(PyExc_RuntimeError, "filter size must be > 0");
    goto exit;
  }

  irank = NI_GetArrayRank(input);

  /* support negative axis specification: */
  if (axis < 0)
    axis += irank;

  /* check axis specification: */
  if (irank > 0 && (axis < 0 || axis >= irank)) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    goto exit;
  }

  /* complex types not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64 ||
      output_type == tComplex32 || output_type == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* if no output type was specified, use the input type: */
  if (output_type == tAny)
    output_type = (NumarrayType)itype;

  /* allocate the output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray(output_type, irank, idims, output_in, output))
    goto exit;

  /* check the filter shift: */
  size1 = filter_size / 2;
  size2 = filter_size - size1 - 1;
  if (size1 + shift < 0 || (filter_size > 0 && size1 + shift >= filter_size)) {
    PyErr_SetString(PyExc_RuntimeError, "shift not within filter extent");
    goto exit;
  }

  /* allocate an initialize the line buffers: */
  lines = -1;
  if (!NI_AllocateLineBuffer(input, axis, size1 + shift, size2 - shift,
			     &lines, BUFFER_SIZE, &ibuffer))
    goto exit;
  
  if (!NI_AllocateLineBuffer(*output, axis, 0, 0, &lines, BUFFER_SIZE, 
			     &obuffer))
    goto exit;
  
  if (!NI_InitLineBuffer(input, axis, size1 + shift, size2 - shift, 
			 lines, ibuffer, mode, cvalue, &iline_buffer))
    goto exit;
  if (!NI_InitLineBuffer(*output, axis, 0, 0, lines, obuffer, mode, 
			 cvalue, &oline_buffer))
    goto exit;
  
  length = irank > 0 ? idims[axis] : 1;
  
  /* iterate over all the array lines: */
  do {
    /* copy lines from array to buffer: */
    if (!NI_ArrayToLineBuffer(&iline_buffer, &lines, &more))
      goto exit;
    for(ii = 0; ii < lines; ii++) {
      /* get lines: */
      double *iline = NI_GET_LINE(iline_buffer, ii);
      double *oline = NI_GET_LINE(oline_buffer, ii);
      /* do the boxcar filter: */
      tmp = 0.0;
      for(ll = 0; ll < filter_size; ll++)
	tmp += iline[ll];
      tmp /= (double)filter_size;
      oline[0] = tmp;
      l1 = iline;
      l2 = iline + filter_size;
      for(ll = 1; ll < length; ll++) {
	tmp += (*l2++ - *l1++) / (double)filter_size;
	oline[ll] = tmp;
      }
    }
    /* copy lines from buffer to array: */
    if (!NI_LineBufferToArray(&oline_buffer))
      goto exit;
  } while(more);


 exit:
  if (ibuffer) free(ibuffer);
  if (obuffer) free(obuffer);
  return PyErr_Occurred() ? 0 : 1;
}


int NI_MinimumMaximumFilter1D(PyArrayObject* input, int filter_size, 
			      int axis, PyArrayObject** output, 
			      PyObject* output_in, NI_ExtendMode mode, 
			      double cvalue, int shift, char minimum)
{
  int size1, size2, length, lines, ll, ii, jj, more, irank, itype;
  int idims[NI_MAXDIM];
  double *ibuffer = NULL, *obuffer = NULL;
  NI_LineBuffer iline_buffer, oline_buffer;

  if (filter_size < 1) {
    PyErr_SetString(PyExc_RuntimeError, "filter size must be > 0");
    goto exit;
  }

  irank = NI_GetArrayRank(input);

  /* support negative axis specification: */
  if (axis < 0)
    axis += irank;

  /* check axis specification: */
  if (irank > 0 && (axis < 0 || axis >= irank)) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    goto exit;
  }

  /* complex types not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  /* allocate the output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray((NumarrayType)itype, irank, idims, output_in, output))
    goto exit;

  /* check the filter shift: */
  size1 = filter_size / 2;
  size2 = filter_size - size1 - 1;
  if (size1 + shift < 0 || (filter_size > 0 && size1 + shift >= filter_size)) {
    PyErr_SetString(PyExc_RuntimeError, "shift not within filter extent");
    goto exit;
  }

  /* allocate an initialize the line buffers: */
  lines = -1;
  if (!NI_AllocateLineBuffer(input, axis, size1 + shift, size2 - shift,
			     &lines, BUFFER_SIZE, &ibuffer))
    goto exit;
  if (!NI_AllocateLineBuffer(*output, axis, 0, 0, &lines, BUFFER_SIZE,
			     &obuffer))
    goto exit;
  if (!NI_InitLineBuffer(input, axis, size1 + shift, size2 - shift, 
			 lines, ibuffer, mode, cvalue, &iline_buffer))
    goto exit;
  if (!NI_InitLineBuffer(*output, axis, 0, 0, lines, obuffer, mode, 
			 cvalue, &oline_buffer))
    goto exit;
  
  length = irank > 0 ? idims[axis] : 1;
  
  /* iterate over all the array lines: */
  do {
    /* copy lines from array to buffer: */
    if (!NI_ArrayToLineBuffer(&iline_buffer, &lines, &more))
      goto exit;
    for(ii = 0; ii < lines; ii++) {
      /* get lines: */
      double *iline = NI_GET_LINE(iline_buffer, ii) + size1;
      double *oline = NI_GET_LINE(oline_buffer, ii);
      for(ll = 0; ll < length; ll++) {
	/* find minimum or maximum: */
	double val = iline[ll - size1];
	for(jj = -size1 + 1; jj <= size2; jj++) {
	  double tmp = iline[ll + jj];
	  if (minimum) { 
	    if (tmp < val)
	      val = tmp;
	  } else {
	    if (tmp > val)
	      val = tmp;
	  }
	}
	oline[ll] = val;
      }
    }
    /* copy lines from buffer to array: */
    if (!NI_LineBufferToArray(&oline_buffer))
      goto exit;
  } while(more); 

 exit:
  if (ibuffer) free(ibuffer);
  if (obuffer) free(obuffer);
  return PyErr_Occurred() ? 0 : 1;
}

#define NI_MIN_MAX_FILTER_POINT(_pi, _po, _offsets, _filter_size,	\
			        _cvalue, _type, _minimum, _mv, _ss)	\
{									\
  int _ii, _oo = *_offsets;						\
  _type _cv = (_type)_cvalue;						\
  _type _tmp, _val = _oo == _mv ? _cv : *(_type*)(_pi + _oo);		\
  if (_ss) 								\
    _val += *_ss;							\
  for(_ii = 1; _ii < _filter_size; _ii++) {				\
    _oo = _offsets[_ii];						\
    _tmp = _oo == _mv ? _cv : *(_type*)(_pi + _oo);			\
    if (_ss)								\
      _tmp += _ss[_ii];							\
    if (_minimum) {							\
      if (_tmp < _val)							\
	_val = (_type)_tmp;						\
    } else {								\
      if (_tmp > _val)							\
	_val = (_type)_tmp;						\
    }									\
  }									\
  *(_type*)_po = _val;							\
}


int NI_MinimumMaximumFilter(PyArrayObject* input, PyArrayObject* footprint, 
			    PyArrayObject* structure, PyArrayObject** output, 
			    PyObject* output_in,
			    NI_ExtendMode mode, double cvalue, int *shifts, 
			    char minimum)
{
  int filter_size = 0, *offsets = NULL, size, *oo, jj, kk, border_flag_value;
  int msize, irank, itype, idims[NI_MAXDIM], fdims[NI_MAXDIM];
  Bool *pm;
  NI_Iterator ii, io;
  NI_FilterIterator fi;
  char *pi, *po;
  Float64 *ps = NULL;
  double *ss = NULL;

  /* complex types not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  irank = NI_GetArrayRank(input);

  /* input and footprint must have the same rank: */
  if (irank != NI_GetArrayRank(footprint)) {
    PyErr_SetString(PyExc_RuntimeError, 
		    "footprint and input arrays must have equal rank");
    goto exit;
  }

  /* footprint must have bool type: */
  if (NI_GetArrayType(footprint) != tBool) {
    PyErr_SetString(PyExc_RuntimeError, "footprint type must be boolean");
    goto exit;
  }

  /* the footprint array must be contigous: */
  if (!PyArray_ISCONTIGUOUS(footprint)) {
    PyErr_SetString(PyExc_RuntimeError, "footprint array must be contiguous");
    goto exit;
  }

  /* get the size of the footprint: */
  pm = (Bool*)NI_GetArrayData(footprint);
  msize = NI_Elements(footprint);
  for(jj = 0; jj < msize; jj++) 
    if (pm[jj]) ++filter_size;

  if (structure) {
    if (!NI_ShapeEqual(structure, footprint)) {
      PyErr_SetString(PyExc_RuntimeError, 
		      "footprint and structure must have equal shape");
      goto exit;
    }
    
    /* the structure must be of Float64 type: */
    if (NI_GetArrayType(structure) != tFloat64) {
      PyErr_SetString(PyExc_RuntimeError, "structure type must be Float64");
      goto exit;
    }
    
    /* the structure array must be contigous: */
    if (!PyArray_ISCONTIGUOUS(structure)) {
      PyErr_SetString(PyExc_RuntimeError, 
		      "structure array must be contiguous");
      goto exit;
    }

    ps = (Float64*)NI_GetArrayData(structure);
    ss = (double*)malloc(filter_size * sizeof(double));
    if (!ss) {
      PyErr_NoMemory();
      goto exit;
    }

    /* copy the weights to contiguous memory: */
    jj = 0;
    for(kk = 0; kk < msize; kk++)
      if (pm[kk])
	ss[jj++] = minimum ? -ps[kk] : ps[kk];
  }

  /* allocate the output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray((NumarrayType)itype, irank, idims, output_in, output))
    goto exit;

  /* filter must be not be emtpy: */
  if (filter_size < 1) {
    PyErr_SetString(PyExc_RuntimeError, "filter size must be > 0");
    goto exit;
  }

  /* calculate the filter offsets: */
  if (!NI_InitFilterOffsetsFromArray(input, footprint, shifts, mode, &offsets, 
				     &border_flag_value))
    goto exit;

  /* initialize input element iterator: */
  if (!NI_InitPointIterator(input, &ii))
    goto exit;

  /* initialize output element iterator: */
  if (!NI_InitPointIterator(*output, &io))
    goto exit;

  /* initialize filter iterator: */
  NI_GetArrayDimensions(footprint, fdims);
  if (!NI_InitFilterIterator(irank, fdims, filter_size, idims, shifts, &fi))
    goto exit;
    
  /* get data pointers an size: */
  pi = NI_GetArrayData(input);
  po = NI_GetArrayData(*output);
  size = NI_Elements(input);

  /* iterator over the elements: */
  oo = offsets;
  for(jj = 0; jj < size; jj++) {
    switch (itype) {
    case tBool:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Bool,
			      minimum, border_flag_value, ss);
      break;
    case tUInt8:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt8, 
			      minimum, border_flag_value, ss);
      break;
    case tUInt16:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt16, 
			      minimum, border_flag_value, ss);
      break;
    case tUInt32:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt32, 
			      minimum, border_flag_value, ss);
      break;
#if HAS_UINT64
    case tUInt64:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt64, 
			      minimum, border_flag_value, ss);
      break;
#endif
    case tInt8:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int8, 
			      minimum, border_flag_value, ss);
      break;
    case tInt16:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int16, 
			      minimum, border_flag_value, ss);
      break;
    case tInt32:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int32,
			      minimum, border_flag_value, ss);
      break;
    case tInt64:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int64,
			      minimum, border_flag_value, ss);
      break;
    case tFloat32:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Float32,
			      minimum, border_flag_value, ss);
      break;
    case tFloat64:
      NI_MIN_MAX_FILTER_POINT(pi, po, oo, filter_size, cvalue, Float64, 
			      minimum, border_flag_value, ss);
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    NI_FILTER_NEXT2(fi, ii, io, oo, pi, po);
  }

 exit:
  if (offsets) 
    free(offsets);
  if (ss)
    free(ss);
  return PyErr_Occurred() ? 0 : 1;
}

static double NI_Select(double *buffer, int min, int max, int rank)
{								
  int ii, jj;							
  double x, t;							
								
  if (min == max)						
    return buffer[min];						
								
  x = buffer[min];						
  ii = min - 1;							
  jj = max + 1;							
  for(;;) {							
    do								
      jj--;							
    while(buffer[jj] > x);					
    do								
      ii++;							
    while(buffer[ii] < x);					
    if (ii < jj) {						
      t = buffer[ii];						
      buffer[ii] = buffer[jj];					
      buffer[jj] = t;						
    } else {							
      break;							
    }								
  }								
								
  ii = jj - min + 1;						
  if (rank < ii)						
    return NI_Select(buffer, min, jj, rank);			
  else								
    return NI_Select(buffer, jj + 1, max, rank - ii);		
}

#define NI_RANK_FILTER_POINT(_pi, _po, _offsets, _filter_size,		 \
			     _cvalue, _type, _buffer, _rank, _mv)	 \
{									 \
  int _ii;								 \
  for(_ii = 0; _ii < _filter_size; _ii++) {				 \
    int _offset = _offsets[_ii];					 \
    if (_offset == _mv)							 \
      _buffer[_ii] = (_type)_cvalue;					 \
    else								 \
      _buffer[_ii] = *(_type*)(_pi + _offsets[_ii]);			 \
  }									 \
  *(_type*)_po = (_type) NI_Select(_buffer, 0, _filter_size - 1, _rank); \
}

int NI_RankFilter(PyArrayObject* input, int filter_rank, 
		    double percentile, PyArrayObject* footprint, 
		    PyArrayObject** output, PyObject* output_in, 
		    NI_ExtendMode mode, double cvalue, int *shifts, 
		    char percentile_filter)
{
  int filter_size = 0, *offsets = NULL, size, *oo, jj, border_flag_value;
  int msize, irank, itype, idims[NI_MAXDIM], fdims[NI_MAXDIM];
  NI_Iterator ii, io;
  NI_FilterIterator fi;
  Bool *pm;
  char *pi, *po;
  double *buffer = NULL;

  /* complex types not supported: */
  itype = NI_GetArrayType(input);
  if (itype == tComplex32 || itype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    goto exit;
  }

  irank = NI_GetArrayRank(input);

  if (irank != NI_GetArrayRank(footprint)) {
    PyErr_SetString(PyExc_RuntimeError, 
		    "footprint and input arrays must have equal rank");
    goto exit;
  }

  /* footprint must have bool type: */
  if (NI_GetArrayType(footprint) != tBool) {
    PyErr_SetString(PyExc_RuntimeError, "footprint type must be boolean");
    goto exit;
  }
  
  /* the footprint array must be contigous: */
  if (!PyArray_ISCONTIGUOUS(footprint)) {
    PyErr_SetString(PyExc_RuntimeError, "footprint array must be contiguous");
    goto exit;
  }

  pm = (Bool*)NI_GetArrayData(footprint);
  msize = NI_Elements(footprint);
  for(jj = 0; jj < msize; jj++) 
    if (pm[jj]) ++filter_size;

  /* allocate the output array: */
  NI_GetArrayDimensions(input, idims);
  if (!NI_OutputArray((NumarrayType)itype, irank,  idims, output_in, output))
    goto exit;

  /* filter must be not be emtpy: */
  if (filter_size < 1) {
    PyErr_SetString(PyExc_RuntimeError, "filter size must be > 0");
    goto exit;
  }

  if (percentile_filter) {
    /* allow negative percentiles: */
    if (percentile < 0.0)
      percentile += 100.0;
    /* check percentile: */
    if (percentile < 0.0 || percentile > 100.0) {
      PyErr_SetString(PyExc_RuntimeError, "specified percentile not valid");
      goto exit;
    }
    /* calculate corresponding rank: */
    filter_rank = (int)((double)filter_size * percentile / 100.0);
  } else {
    /* allow negative rank: */
    if (filter_rank < 0)
      filter_rank += filter_size;
    /* check rank: */
    if (filter_rank < 0 || filter_rank >= filter_size) {
      PyErr_SetString(PyExc_RuntimeError, "specified rank not valid");
      goto exit;
    }
  }

  /* calculate the filter offsets: */
  if (!NI_InitFilterOffsetsFromArray(input, footprint, shifts, mode, &offsets, 
				     &border_flag_value))
    goto exit;

  /* initialize input element iterator: */
  if (!NI_InitPointIterator(input, &ii))
    goto exit;

  /* initialize output element iterator: */
  if (!NI_InitPointIterator(*output, &io))
    goto exit;

  NI_GetArrayDimensions(footprint, fdims);
  if (!NI_InitFilterIterator(irank, fdims, filter_size, idims, shifts, &fi))
    goto exit;
    
  /* get data pointers an size: */
  pi = NI_GetArrayData(input);
  po = NI_GetArrayData(*output);
  size = NI_Elements(input);

  /* buffer for rank calculation: */
  buffer = (double*)malloc(filter_size * sizeof(double));
  if (!buffer) {
    PyErr_NoMemory();
    goto exit;
  }

  /* iterator over the elements: */
  oo = offsets;
  for(jj = 0; jj < size; jj++) {
    switch (itype) {
    case tBool:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Bool,
			   buffer, filter_rank, border_flag_value);
      break;
    case tUInt8:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt8,
			   buffer, filter_rank, border_flag_value);
      break;
    case tUInt16:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt16,
			   buffer, filter_rank, border_flag_value);
      break;
    case tUInt32:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt32,
			   buffer, filter_rank, border_flag_value);
      break;
#if HAS_UINT64
    case tUInt64:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, UInt64,
			   buffer, filter_rank, border_flag_value);
      break;
#endif
    case tInt8:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int8,
			   buffer, filter_rank, border_flag_value);
      break;
    case tInt16:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int16,
			   buffer, filter_rank, border_flag_value);
      break;
    case tInt32:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int32,
			   buffer, filter_rank, border_flag_value);
      break;
    case tInt64:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Int64,
			   buffer, filter_rank, border_flag_value);
      break;
    case tFloat32:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Float32, 
			   buffer, filter_rank, border_flag_value);
      break;
    case tFloat64:
      NI_RANK_FILTER_POINT(pi, po, oo, filter_size, cvalue, Float64, 
			   buffer, filter_rank, border_flag_value);
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "data type not supported");
      goto exit;
    }
    NI_FILTER_NEXT2(fi, ii, io, oo, pi, po);
  }

 exit:
  if (offsets) free(offsets);
  if (buffer) free(buffer);
  return PyErr_Occurred() ? 0 : 1;
}

