#include <sys/types.h>
#include <sys/time.h>
#include <math.h>
#include "fftc8.h"

fftc8 x[1024];

#define TIMINGS 10

struct timeval tstart[TIMINGS];
struct timeval tend[TIMINGS];

void doit(int size,void (*fft)(),int calls)
{
  int i;
  int j;
  long long diff;

  for (i = 0;i < 10;++i)
    fft(x); /* prime the cache */
  for (j = 0;j < TIMINGS;++j) {
    gettimeofday(tstart + j,(struct timezone *) 0);
    for (i = 0;i < calls;++i)
      fft(x);
    gettimeofday(tend + j,(struct timezone *) 0);
  }
  printf("%4d:",size);
  for (j = 0;j < TIMINGS;++j) {
    diff = tend[j].tv_sec - tstart[j].tv_sec;
    diff *= 1000000;
    diff += tend[j].tv_usec - tstart[j].tv_usec;
    diff *= 1000;
    diff /= size;
    diff /= calls;
    printf(" %6ld",(long) diff);
  }
  printf("\n");
}

void main()
{
  int j;

  for (j = 0;j < 1024;++j) x[j].re = x[j].im = 0;
  /* are there any modern cpus where fp timing is data-dependent? */
  /* aside from overflows, of course. */

  doit(2,fftc8_2,1024);
  doit(4,fftc8_4,512);
  doit(8,fftc8_8,256);
  doit(16,fftc8_16,128);
  doit(32,fftc8_32,64);
  doit(64,fftc8_64,32);
  doit(128,fftc8_128,16);
  doit(256,fftc8_256,8);
  doit(512,fftc8_512,8);
  doit(1024,fftc8_1024,8);
}
