#include "fftc4.h"
#include "fftc8.h"
#include "multiplyc4.h"
#include "multiplyc8.h"
#include "multiplyr4.h"
#include "multiplyr8.h"
#include "timing.h"

complex8 *x8;
complex4 *x4;

#define TIMINGS 10
#define TIMINGSLOW 5

timing start;
timing_basic startb;
timing finish;
timing_basic finishb;

timing t[100][2][TIMINGS];

void doit4(register timing t[2][TIMINGS],register void (*fft)(),register void (*unfft)(),register void (*multiply)())
{
  register complex4 *x = x4;
  register int j;
  for (j = 0;j < TIMINGS;++j) {
    timing_now(&t[0][j]);
    fft(x);
    multiply(x,x);
    unfft(x);
    timing_now(&t[1][j]);
  }
}

void doit8(register timing t[2][TIMINGS],register void (*fft)(),register void (*unfft)(),register void (*multiply)())
{
  register complex8 *x = x8;
  register int j;
  for (j = 0;j < TIMINGS;++j) {
    timing_now(&t[0][j]);
    fft(x);
    multiply(x,x);
    unfft(x);
    timing_now(&t[1][j]);
  }
}

void print(timing t[2][TIMINGS],int size)
{
  double diff;
  double diffmin;
  int j;

  printf("%4d:",size);
  for (j = TIMINGSLOW;j < TIMINGS;++j) {
    diff = timing_diff(&t[1][j],&t[0][j]);
    printf(" %9.0f",diff);
    if ((j == TIMINGSLOW) || (diff < diffmin)) diffmin = diff;
  }
  printf("  %10.3f/pt\n",diffmin / size);
}

main()
{
  int j;

  x8 = (complex8 *) malloc(8192 * sizeof(complex8));
  if (!x8) exit(1);
  x4 = (complex4 *) malloc(8192 * sizeof(complex4));
  if (!x4) exit(1);

  for (j = 0;j < 8192;++j) x4[j].re = x4[j].im = 0;
  for (j = 0;j < 8192;++j) x8[j].re = x8[j].im = 0;

  timing_basic_now(&startb);
  timing_now(&start);

  doit4(t[4],fftc4_2,fftc4_un4,multiplyc4_2);
  doit4(t[5],fftc4_2,fftc4_un4,multiplyr4_2);
  doit8(t[6],fftc8_2,fftc8_un4,multiplyc8_2);
  doit8(t[7],fftc8_2,fftc8_un4,multiplyr8_2);
  doit4(t[8],fftc4_4,fftc4_un4,multiplyc4_4);
  doit4(t[9],fftc4_4,fftc4_un4,multiplyr4_4);
  doit8(t[10],fftc8_4,fftc8_un4,multiplyc8_4);
  doit8(t[11],fftc8_4,fftc8_un4,multiplyr8_4);
  doit4(t[12],fftc4_8,fftc4_un8,multiplyc4_8);
  doit4(t[13],fftc4_8,fftc4_un8,multiplyr4_8);
  doit8(t[14],fftc8_8,fftc8_un8,multiplyc8_8);
  doit8(t[15],fftc8_8,fftc8_un8,multiplyr8_8);
  doit4(t[16],fftc4_16,fftc4_un16,multiplyc4_16);
  doit4(t[17],fftc4_16,fftc4_un16,multiplyr4_16);
  doit8(t[18],fftc8_16,fftc8_un16,multiplyc8_16);
  doit8(t[19],fftc8_16,fftc8_un16,multiplyr8_16);
  doit4(t[20],fftc4_32,fftc4_un32,multiplyc4_32);
  doit4(t[21],fftc4_32,fftc4_un32,multiplyr4_32);
  doit8(t[22],fftc8_32,fftc8_un32,multiplyc8_32);
  doit8(t[23],fftc8_32,fftc8_un32,multiplyr8_32);
  doit4(t[24],fftc4_64,fftc4_un64,multiplyc4_64);
  doit4(t[25],fftc4_64,fftc4_un64,multiplyr4_64);
  doit8(t[26],fftc8_64,fftc8_un64,multiplyc8_64);
  doit8(t[27],fftc8_64,fftc8_un64,multiplyr8_64);
  doit4(t[28],fftc4_128,fftc4_un128,multiplyc4_128);
  doit4(t[29],fftc4_128,fftc4_un128,multiplyr4_128);
  doit8(t[30],fftc8_128,fftc8_un128,multiplyc8_128);
  doit8(t[31],fftc8_128,fftc8_un128,multiplyr8_128);
  doit4(t[32],fftc4_256,fftc4_un256,multiplyc4_256);
  doit4(t[33],fftc4_256,fftc4_un256,multiplyr4_256);
  doit8(t[34],fftc8_256,fftc8_un256,multiplyc8_256);
  doit8(t[35],fftc8_256,fftc8_un256,multiplyr8_256);
  doit4(t[36],fftc4_512,fftc4_un512,multiplyc4_512);
  doit4(t[37],fftc4_512,fftc4_un512,multiplyr4_512);
  doit8(t[38],fftc8_512,fftc8_un512,multiplyc8_512);
  doit8(t[39],fftc8_512,fftc8_un512,multiplyr8_512);
  doit4(t[40],fftc4_1024,fftc4_un1024,multiplyc4_1024);
  doit4(t[41],fftc4_1024,fftc4_un1024,multiplyr4_1024);
  doit8(t[42],fftc8_1024,fftc8_un1024,multiplyc8_1024);
  doit8(t[43],fftc8_1024,fftc8_un1024,multiplyr8_1024);

  timing_basic_now(&finishb);
  timing_now(&finish);

  for (j = 1;j <= 10;++j) {
    print(t[j * 4],1 << j);
    print(t[j * 4 + 1],1 << j);
  }
  for (j = 1;j <= 10;++j) {
    print(t[j * 4 + 2],1 << j);
    print(t[j * 4 + 3],1 << j);
  }

  printf("Timings are in ticks. Nanoseconds per tick: approximately %f.\n"
    ,timing_basic_diff(&finishb,&startb) / timing_diff(&finish,&start));
  printf("Timings may be underestimates on systems without hardware tick support.\n");

  exit(0);
}
