#include "fftc4.h"
#include "scalec4.h"
#include "multiplyc4.h"
#include "multiplyr4.h"
#include "fftc8.h"
#include "scalec8.h"
#include "multiplyc8.h"
#include "multiplyr8.h"

complex4 x4[8192];
complex4 y4[8192];
complex4 z4[8192];

complex8 x8[8192];
complex8 y8[8192];
complex8 z8[8192];

void fill4(int n)
{
  int i;

  for (i = 0;i < n;++i) x4[i].re = 0.000000001 * rand();
  for (i = 0;i < n;++i) x4[i].im = 0.000000001 * rand();
  for (i = 0;i < n;++i) y4[i].re = 0.000000001 * rand();
  for (i = 0;i < n;++i) y4[i].im = 0.000000001 * rand();
  for (i = 0;i < n;++i) z4[i].re = 0;
  for (i = 0;i < n;++i) z4[i].im = 0;
}

void err4(int n,char *format)
{
  int i;
  real8 total = 0;
  real8 error = 0;

  for (i = 0;i < n;++i) {
    total += z4[i].re * (real8) z4[i].re;
    total += z4[i].im * (real8) z4[i].im;
  }

  for (i = 0;i < n;++i) {
    error += (x4[i].re - z4[i].re) * (real8) (x4[i].re - z4[i].re);
    error += (x4[i].im - z4[i].im) * (real8) (x4[i].im - z4[i].im);
  }

  printf(format,n,error / (total + 0.0000000000000000000001));
}

void doitr4(int n,void (*fft)(),void (*unfft)(),void (*multiply)())
{
  int i;
  int j;

  fill4(n);

  for (i = 0;i < n;++i)
    for (j = 0;j < n;++j) {
      z4[(i + j) & (n - 1)].re += x4[i].re * y4[j].re;
      z4[(i + j) & (n - 1)].im += x4[i].re * y4[j].im;
      z4[(i + j) & (n - 1)].im += x4[i].im * y4[j].re;
      z4[(i + j + 1) & (n - 1)].re += x4[i].im * y4[j].im;
    }

  fft(y4);
  scalec4(y4,n,0.25/n);
  fft(x4);
  multiply(x4,y4);
  unfft(x4);

  err4(n,"%d r4 %e\n");
}

void doitc4(int n,void (*fft)(),void (*unfft)(),void (*multiply)())
{
  int i;
  int j;
  real8 total = 0;
  real8 error = 0;

  fill4(n);

  for (i = 0;i < n;++i)
    for (j = 0;j < n;++j) {
      z4[(i + j) & (n - 1)].re += x4[i].re * y4[j].re;
      z4[(i + j) & (n - 1)].im += x4[i].re * y4[j].im;
      z4[(i + j) & (n - 1)].im += x4[i].im * y4[j].re;
      z4[(i + j) & (n - 1)].re -= x4[i].im * y4[j].im;
    }

  fft(y4);
  scalec4(y4,n,1.0/n);
  fft(x4);
  multiply(x4,y4);
  unfft(x4);

  err4(n,"%d c4 %e\n");
}

void fill8(int n)
{
  int i;

  for (i = 0;i < n;++i) x8[i].re = 0.000000001 * rand();
  for (i = 0;i < n;++i) x8[i].im = 0.000000001 * rand();
  for (i = 0;i < n;++i) y8[i].re = 0.000000001 * rand();
  for (i = 0;i < n;++i) y8[i].im = 0.000000001 * rand();
  for (i = 0;i < n;++i) z8[i].re = 0;
  for (i = 0;i < n;++i) z8[i].im = 0;
}

void err8(int n,char *format)
{
  int i;
  real8 total = 0;
  real8 error = 0;

  for (i = 0;i < n;++i) {
    total += z8[i].re * z8[i].re;
    total += z8[i].im * z8[i].im;
  }

  for (i = 0;i < n;++i) {
    error += (x8[i].re - z8[i].re) * (x8[i].re - z8[i].re);
    error += (x8[i].im - z8[i].im) * (x8[i].im - z8[i].im);
  }

  printf(format,n,error / (total + 0.0000000000000000000001));
}

void doitr8(int n,void (*fft)(),void (*unfft)(),void (*multiply)())
{
  int i;
  int j;

  fill8(n);

  for (i = 0;i < n;++i)
    for (j = 0;j < n;++j) {
      z8[(i + j) & (n - 1)].re += x8[i].re * y8[j].re;
      z8[(i + j) & (n - 1)].im += x8[i].re * y8[j].im;
      z8[(i + j) & (n - 1)].im += x8[i].im * y8[j].re;
      z8[(i + j + 1) & (n - 1)].re += x8[i].im * y8[j].im;
    }

  fft(y8);
  scalec8(y8,n,0.25/n);
  fft(x8);
  multiply(x8,y8);
  unfft(x8);

  err8(n,"%d r8 %e\n");
}

void doitc8(int n,void (*fft)(),void (*unfft)(),void (*multiply)())
{
  int i;
  int j;
  real8 total = 0;
  real8 error = 0;

  fill8(n);

  for (i = 0;i < n;++i)
    for (j = 0;j < n;++j) {
      z8[(i + j) & (n - 1)].re += x8[i].re * y8[j].re;
      z8[(i + j) & (n - 1)].im += x8[i].re * y8[j].im;
      z8[(i + j) & (n - 1)].im += x8[i].im * y8[j].re;
      z8[(i + j) & (n - 1)].re -= x8[i].im * y8[j].im;
    }

  fft(y8);
  scalec8(y8,n,1.0/n);
  fft(x8);
  multiply(x8,y8);
  unfft(x8);

  err8(n,"%d c8 %e\n");
}

main()
{
  doitr4(2,fftc4_2,fftc4_un2,multiplyr4_2);
  doitc4(2,fftc4_2,fftc4_un2,multiplyc4_2);
  doitr8(2,fftc8_2,fftc8_un2,multiplyr8_2);
  doitc8(2,fftc8_2,fftc8_un2,multiplyc8_2);
  doitr4(4,fftc4_4,fftc4_un4,multiplyr4_4);
  doitc4(4,fftc4_4,fftc4_un4,multiplyc4_4);
  doitr8(4,fftc8_4,fftc8_un4,multiplyr8_4);
  doitc8(4,fftc8_4,fftc8_un4,multiplyc8_4);
  doitr4(8,fftc4_8,fftc4_un8,multiplyr4_8);
  doitc4(8,fftc4_8,fftc4_un8,multiplyc4_8);
  doitr8(8,fftc8_8,fftc8_un8,multiplyr8_8);
  doitc8(8,fftc8_8,fftc8_un8,multiplyc8_8);
  doitr4(16,fftc4_16,fftc4_un16,multiplyr4_16);
  doitc4(16,fftc4_16,fftc4_un16,multiplyc4_16);
  doitr8(16,fftc8_16,fftc8_un16,multiplyr8_16);
  doitc8(16,fftc8_16,fftc8_un16,multiplyc8_16);
  doitr4(32,fftc4_32,fftc4_un32,multiplyr4_32);
  doitc4(32,fftc4_32,fftc4_un32,multiplyc4_32);
  doitr8(32,fftc8_32,fftc8_un32,multiplyr8_32);
  doitc8(32,fftc8_32,fftc8_un32,multiplyc8_32);
  doitr4(64,fftc4_64,fftc4_un64,multiplyr4_64);
  doitc4(64,fftc4_64,fftc4_un64,multiplyc4_64);
  doitr8(64,fftc8_64,fftc8_un64,multiplyr8_64);
  doitc8(64,fftc8_64,fftc8_un64,multiplyc8_64);
  doitr4(128,fftc4_128,fftc4_un128,multiplyr4_128);
  doitc4(128,fftc4_128,fftc4_un128,multiplyc4_128);
  doitr8(128,fftc8_128,fftc8_un128,multiplyr8_128);
  doitc8(128,fftc8_128,fftc8_un128,multiplyc8_128);
  doitr4(256,fftc4_256,fftc4_un256,multiplyr4_256);
  doitc4(256,fftc4_256,fftc4_un256,multiplyc4_256);
  doitr8(256,fftc8_256,fftc8_un256,multiplyr8_256);
  doitc8(256,fftc8_256,fftc8_un256,multiplyc8_256);
  doitr4(512,fftc4_512,fftc4_un512,multiplyr4_512);
  doitc4(512,fftc4_512,fftc4_un512,multiplyc4_512);
  doitr8(512,fftc8_512,fftc8_un512,multiplyr8_512);
  doitc8(512,fftc8_512,fftc8_un512,multiplyc8_512);
  doitr4(1024,fftc4_1024,fftc4_un1024,multiplyr4_1024);
  doitc4(1024,fftc4_1024,fftc4_un1024,multiplyc4_1024);
  doitr8(1024,fftc8_1024,fftc8_un1024,multiplyr8_1024);
  doitc8(1024,fftc8_1024,fftc8_un1024,multiplyc8_1024);

#ifdef BIG
  doitr4(2048,fftc4_2048,fftc4_un2048,multiplyr4_2048);
  doitc4(2048,fftc4_2048,fftc4_un2048,multiplyc4_2048);
  doitr8(2048,fftc8_2048,fftc8_un2048,multiplyr8_2048);
  doitc8(2048,fftc8_2048,fftc8_un2048,multiplyc8_2048);
  doitr4(4096,fftc4_4096,fftc4_un4096,multiplyr4_4096);
  doitc4(4096,fftc4_4096,fftc4_un4096,multiplyc4_4096);
  doitr8(4096,fftc8_4096,fftc8_un4096,multiplyr8_4096);
  doitc8(4096,fftc8_4096,fftc8_un4096,multiplyc8_4096);
  doitr4(8192,fftc4_8192,fftc4_un8192,multiplyr4_8192);
  doitc4(8192,fftc4_8192,fftc4_un8192,multiplyc4_8192);
  doitr8(8192,fftc8_8192,fftc8_un8192,multiplyr8_8192);
  doitc8(8192,fftc8_8192,fftc8_un8192,multiplyc8_8192);
#endif

  exit(0);
}
