/*
 * Generate a fixed-size FFT algorithm.
 *
 * $Date: 1997/06/09 19:24:39 $
 */

#include <stdio.h>

#include "complex.h"
#include "fft.h"
#include "util.h"

#define CPLX      "(%23.20" LFMT "f%+24.20" LFMT "fi)"
#define ReIm(x)   Re(x), Im(x)

#define BODY(foo...)   fprintf(stdout, ## foo);

#if 0
#define DO_DEBUG(foo...) fprintf(stderr, ## foo)
#else
#define DO_DEBUG(foo...)
#endif
#define NO_DEBUG(foo...)

#if 0
#define DISP(t) fprintf(stderr, #t " = %" LFMT "f\n", t)
#else
#define DISP(t)
#endif

unsigned long bit_reverse(unsigned long num, unsigned long max)
{
  unsigned long ret = 0;
  for (max >>= 1 ; max ; max >>= 1) {
    ret <<= 1;
    if (num & 1)
      ret++;
    num >>= 1;
  }
  return ret;
}
#define R(x)		bit_reverse(x, nn)

FFT_Context init_fft2 (unsigned long nn, int isign)
{
  unsigned long first_set, tmp, i;
  real theta;
  FFT_Context cont = safe_malloc(sizeof(struct FFT_Context_Data), "context");

  for (first_set=0, tmp=nn ; tmp ; first_set++, tmp>>=1);

  cont->nn = nn;
  cont->isign = isign;  /* informational only */

  cont->Wbase = safe_malloc(first_set * sizeof(complex), "Wbase");
  for (i=0 ; i<first_set ; i++) {
    theta = isign*M_PI/(1<<i);
    cont->Wbase[i] = (cos(theta) + sin(theta)*1i);
  }

  return cont;
}

/* things that print */

void gen_begin(unsigned long nn, char **argv)
{
  char sep;

  BODY("/* DO NOT MODIFY; automatically generated by ");
  sep = '\'';
  while (*argv) {
    BODY("%c%s", sep, *argv);
    sep = ' ';
    argv++;
  }
  BODY("' */\n\n");
  BODY("/*\n * fixed-size 2D FFT algorithm, %lux%lu grid\n */\n\n",
	  nn, nn);
  BODY("#include \"complex.h\"\n#include \"fft.h\"\n\n");

}

void gen_ifft2 (FFT_Context fft)
{
  unsigned long blocksize, start, offset, row, col;
  unsigned long k1, k2;
  complex W, Wk;
  complex *Wb;
  unsigned long nn = fft->nn;

  BODY("void ifft_%lux%lu(complex data[])\n{\n  complex tempz;\n", nn, nn);

  BODY("\n  /* Danielson-Lanczos in the first dimension. */\n");
  Wb = fft->Wbase;
  for (blocksize=1; blocksize < nn; blocksize <<= 1) {
    W = *(Wb++);
    Wk=1.0;
    for (offset=0; offset<blocksize; offset++) {
      for (start=0; start<nn; start+=(2*blocksize)) {
	for (row=0; row<nn; row++) {
	  k1=nn*row+start+offset;
	  k2=k1+blocksize;
	  BODY("  tempz = " CPLX " * data[%lu];\n", ReIm(Wk), k2);
	  BODY("  data[%lu]=data[%lu]-tempz;", k2, k1);
	  BODY("  data[%lu] += tempz;\n", k1);
	}
      }
      Wk *= W;
    }
  }

  BODY("\n  /* Danielson-Lanczos in the second dimension. */\n");
  Wb = fft->Wbase;
  for (blocksize=1; blocksize < nn; blocksize <<= 1) {
    W = *(Wb++);
    Wk=1.0;
    for (offset=0; offset<blocksize; offset++) {
      for (start=0; start<nn; start+=(2*blocksize)) {
	for (col=0; col<nn; col++) {
	  k1=col+nn*(start+offset);
	  k2=k1+nn*blocksize;
	  BODY("  tempz = " CPLX " * data[%lu];\n", ReIm(Wk), k2);
	  BODY("  data[%lu]=data[%lu]-tempz;", k2, k1);
	  BODY("  data[%lu] += tempz;\n", k1);
	}
      }
      Wk *= W;
    }
  }
  BODY("}\n\n");
}

void gen_hfft2 (FFT_Context fft)
{
  unsigned long blocksize, start, offset, row, col;
  unsigned long k1, k2;
  complex W, Wk;
  complex *Wb;
  unsigned long nn = fft->nn;

  BODY("void hfft_%lux%lu(complex data[])\n{\n  complex tempz;\n", nn, nn);

  BODY("\n  /* Danielson-Lanczos in the first dimension. */\n");
  Wb = fft->Wbase;
  for (blocksize=nn>>1; blocksize > 0; blocksize >>= 1) {
    W = *(Wb++);
    Wk=1.0;
    for (offset=0; R(offset)<R(blocksize); offset=R(1+R(offset))) {
      for (start=0; start<blocksize; start++) {
	for (row=0; row<nn; row++) {
	  k1=nn*row+start+offset;
	  k2=k1+blocksize;
	  BODY("  tempz = " CPLX " * data[%lu];\n", ReIm(Wk), k2);
	  BODY("  data[%lu]=data[%lu]-tempz;", k2, k1);
	  BODY("  data[%lu] += tempz;\n", k1);
	}
      }
      Wk *= W;
    }
  }

  BODY("\n  /* Danielson-Lanczos in the second dimension. */\n");
  Wb = fft->Wbase;
  for (blocksize=nn>>1; blocksize > 0; blocksize >>= 1) {
    W = *(Wb++);
    Wk=1.0;
    for (offset=0; R(offset)<R(blocksize); offset=R(1+R(offset))) {
      for (start=0; start<blocksize; start++) {
	for (col=0; col<nn; col++) {
	  k1=col+nn*(start+offset);
	  k2=k1+nn*blocksize;
	  BODY("  tempz = " CPLX " * data[%lu];\n", ReIm(Wk), k2);
	  BODY("  data[%lu]=data[%lu]-tempz;", k2, k1);
	  BODY("  data[%lu] += tempz;\n", k1);
	}
      }
      Wk *= W;
    }
  }
  BODY("}\n\n");
}

void gen_swap(unsigned long nn)
{
  unsigned long ibit, count, reverse;
  unsigned long row, col;

  BODY("#define SWAP(a,b)  tempz=(a);(a)=(b);(b)=tempz\n\n");
  BODY("void swap_%lux%lu(complex data[])\n{\n  complex tempz;\n\n", nn, nn);

  /* Swap in both dimensions simultaneously. */
  reverse=0;
  for (count=0; count<nn; count++) {
    if (count < reverse) {   /* must avoid swapping both A<->B and B<->A */
      for (row=0; row<nn; row++) {
	BODY("  SWAP(data[%lu],data[%lu]);\n", nn*row+count, nn*row+reverse);
      }
      for (col=0; col<nn; col++) {
	BODY("  SWAP(data[%lu],data[%lu]);\n", nn*count+col, nn*reverse+col);
      }
    }
    /* Increment <reverse>, using opposite bit-ordering than usual.
     * To do this, we turn any 1's at the "least-significant" end of the
     * number into 0's, and the "least-significant" 0 next to them into a
     * 1.  (In adding a 1, carry propagation flips all the 1's and the 0.)
     * Keep in mind that "least-significant" indicates the bit that is the
     * most significant in "normal" numbers, though...
     */
    ibit= nn >> 1;
    while (ibit && reverse >= ibit) {
      reverse -= ibit;
      ibit >>= 1;
    }
    reverse += ibit;
  }
  BODY("}\n\n");
}

void usage(char* name, char* msg)
{
  fprintf(stderr, "%s: %s\n", name, msg);
  exit(1);
}

int main(int argc, char **argv)
{
  unsigned long size, arg;
  char* endnum;
  FFT_Context nor_fft, inv_fft;

  if (argc < 2)
    usage(argv[0], "Argument needed.");
  if (argc > 2)
    usage(argv[0], "Only one argument required.");
  arg = strtoul(argv[1], &endnum, 10);
  if ((endnum == argv[1]) || *endnum)
    usage(argv[0], "Argument is not a positive integer.");
  for (size = 1; size && (size < arg); size <<= 1);
  if (size != arg)
    usage(argv[0], "Argument is not a power of 2.");

  nor_fft = init_fft2(size, 1);
  inv_fft = init_fft2(size, -1);

  /* generate output */
  gen_begin(size, argv);
  gen_hfft2(nor_fft);
  gen_ifft2(inv_fft);
  gen_swap(size);

  BODY("void hfft_swap_%lux%lu(complex data[])\n{\n", size, size);
  BODY("  hfft_%lux%lu(data);\n  swap_%lux%lu(data);\n}\n\n",
       size, size, size, size);
  BODY("void swap_ifft_%lux%lu(complex data[])\n{\n", size, size);
  BODY("  swap_%lux%lu(data);\n  ifft_%lux%lu(data);\n}\n",
       size, size, size, size);

  exit(0);
}
