#include <sys/types.h>
#include <sys/time.h>
#include <math.h>
#include "fftc4.h"
#include "fftc8.h"

fftc8 *x8;
fftc4 *x4;

#define TIMINGS 10

struct timeval tstart[TIMINGS];
struct timeval tend[TIMINGS];

void doit4(int size,register void (*fft)(),register int calls)
{
  register int i;
  register int j;
  long long diff;

  for (i = 0;i < 10;++i)
    fft(x4); /* prime the cache */
  for (j = 0;j < TIMINGS;++j) {
    gettimeofday(tstart + j,(struct timezone *) 0);
    for (i = 0;i < calls;++i)
      fft(x4);
    gettimeofday(tend + j,(struct timezone *) 0);
  }
  printf("%4d:",size);
  for (j = 0;j < TIMINGS;++j) {
    diff = tend[j].tv_sec - tstart[j].tv_sec;
    diff *= 1000000;
    diff += tend[j].tv_usec - tstart[j].tv_usec;
    diff *= 1000;
    diff /= size;
    diff /= calls;
    printf(" %6ld",(long) diff);
  }
  printf("\n");
}

void doit8(int size,register void (*fft)(),register int calls)
{
  register int i;
  register int j;
  long long diff;

  for (i = 0;i < 10;++i)
    fft(x8); /* prime the cache */
  for (j = 0;j < TIMINGS;++j) {
    gettimeofday(tstart + j,(struct timezone *) 0);
    for (i = 0;i < calls;++i)
      fft(x8);
    gettimeofday(tend + j,(struct timezone *) 0);
  }
  printf("%4d:",size);
  for (j = 0;j < TIMINGS;++j) {
    diff = tend[j].tv_sec - tstart[j].tv_sec;
    diff *= 1000000;
    diff += tend[j].tv_usec - tstart[j].tv_usec;
    diff *= 1000;
    diff /= size;
    diff /= calls;
    printf(" %6ld",(long) diff);
  }
  printf("\n");
}

void main()
{
  int j;

  x8 = (fftc8 *) malloc(1024 * sizeof(fftc8));
  if (!x8) exit(1);
  x4 = (fftc4 *) malloc(1024 * sizeof(fftc8));
  if (!x4) exit(1);

  for (j = 0;j < 1024;++j) x4[j].re = x4[j].im = 0;
  for (j = 0;j < 1024;++j) x8[j].re = x8[j].im = 0;
  /* are there any modern cpus where fp timing is data-dependent? */
  /* aside from overflows, of course. */

  doit4(2,fftc4_2,1024);
  doit4(2,fftc4_un2,1024);
  doit4(4,fftc4_4,512);
  doit4(4,fftc4_un4,512);
  doit4(8,fftc4_8,256);
  doit4(8,fftc4_un8,256);
  doit4(16,fftc4_16,128);
  doit4(16,fftc4_un16,128);
  doit4(32,fftc4_32,64);
  doit4(32,fftc4_un32,64);
  doit4(64,fftc4_64,32);
  doit4(64,fftc4_un64,32);
  doit4(128,fftc4_128,16);
  doit4(128,fftc4_un128,16);
  doit4(256,fftc4_256,8);
  doit4(256,fftc4_un256,8);
  doit4(512,fftc4_512,8);
  doit4(512,fftc4_un512,8);
  doit4(1024,fftc4_1024,8);
  doit4(1024,fftc4_un1024,8);

  doit8(2,fftc8_2,1024);
  doit8(2,fftc8_un2,1024);
  doit8(4,fftc8_4,512);
  doit8(4,fftc8_un4,512);
  doit8(8,fftc8_8,256);
  doit8(8,fftc8_un8,256);
  doit8(16,fftc8_16,128);
  doit8(16,fftc8_un16,128);
  doit8(32,fftc8_32,64);
  doit8(32,fftc8_un32,64);
  doit8(64,fftc8_64,32);
  doit8(64,fftc8_un64,32);
  doit8(128,fftc8_128,16);
  doit8(128,fftc8_un128,16);
  doit8(256,fftc8_256,8);
  doit8(256,fftc8_un256,8);
  doit8(512,fftc8_512,8);
  doit8(512,fftc8_un512,8);
  doit8(1024,fftc8_1024,8);
  doit8(1024,fftc8_un1024,8);

  exit(0);
}
