#include "zmult.h"
#include "timing.h"

static timing start;
static timing_basic startb;
static timing finish;
static timing_basic finishb;

#define TIMINGS 6
static timing t[200][TIMINGS + 2];
static timing tnothing[TIMINGS + 2];

main()
{
  double *u;
  int i;
  double diff;

  zmult_fpmode();

  u = (double *) malloc(262144 * sizeof(double));
  if (!u) exit(111);

  for (i = 0;i < 262144;++i)
    u[i] = i;
  for (i = 0;i < 262144;++i)
    u[i] = 0;

  timing_basic_now(&startb);
  timing_now(&start);

#define TIME(what,x) \
  timing_now(&x[0]); for (i = 1;i <= TIMINGS;++i) { what; timing_now(&x[i]); }

  TIME(,tnothing)

  TIME(zmult_poly_2(u,u + 3,u + 5,u + 7),t[150]);
  TIME(zmult_poly_4(u,u + 7,u + 11,u + 15),t[151]);
  TIME(zmult_poly_8(u,u + 15,u + 23,u + 31),t[152]);
  TIME(zmult_poly_16(u,u + 31,u + 47,u + 63),t[153]);
  TIME(zmult_poly_32(u,u + 63,u + 95,u + 127),t[154]);
  TIME(zmult_poly_64(u,u + 127,u + 191,u + 255),t[155]);
  TIME(zmult_poly_128(u,u + 255,u + 383,u + 511),t[156]);
  TIME(zmult_poly_256(u,u + 511,u + 767,u + 1023),t[157]);
  TIME(zmult_poly_512(u,u + 1023,u + 1535,u + 2047),t[158]);
  TIME(zmult_poly_16_plus(u,u + 16,u + 32,u + 48),t[163]);
  TIME(zmult_poly_512_plus(u,u + 512,u + 1024,u + 1536),t[168]);
  TIME(zmult_48_4_plus(u,u + 4,u + 8,u + 12),t[0])
  TIME(zmult_48_32_plus(u,u + 32,u + 64,u + 96),t[1])
  TIME(zmult_48_8192_plus(u,u + 8192,u + 16384,u + 24576),t[2])

  TIME(zmult_4fft_8_0(u),t[50])
  TIME(zmult_4fft_un8_0(u),t[51])
  TIME(zmult_4fft_16_192(u),t[52])
  TIME(zmult_4fft_un16_192(u),t[53])
  TIME(zmult_32fft_32_0(u,u + 1024),t[54])
  TIME(zmult_32fft_un32_0(u,u + 1024),t[55])
  TIME(zmult_32fft_512(u,1,u + 1024),t[56])
  TIME(zmult_32fft_un512(u,1,u + 1024),t[57])
  TIME(zmult_48_8192_spread(u,u + 16896),t[58])
  TIME(zmult_48_8192_unspread(u,u + 8192,u + 24576),t[59])

  TIME(zmult_1(u,u + 2,u + 3,u + 4),t[100])
  TIME(zmult_2(u,u + 4,u + 6,u + 8),t[101])
  TIME(zmult_4(u,u + 8,u + 16,u + 24),t[102])

  timing_basic_now(&finishb);
  timing_now(&finish);


  printf("Using");
#ifdef HASRDTSC
  printf(" RDTSC,");
#else
#ifdef HASGETHRTIME
  printf(" gethrtime(),");
#endif
#ifdef HASRPCC
  printf(" rpcc(),");
#endif
#endif
  printf(" %s/*.c.\n",ZMULT_H);


#define PRINT(label,x) \
  printf(label); \
  for (i = 0;i < TIMINGS;++i) { \
    diff = timing_diff(&x[i + 1],&x[i]); \
    printf(" %9.0f",diff); \
  } \
  printf("\n");

  PRINT("empty           ",tnothing)
  PRINT("poly_2          ",t[150])
  PRINT("poly_4          ",t[151])
  PRINT("poly_8          ",t[152])
  PRINT("poly_16         ",t[153])
  PRINT("poly_16_plus    ",t[163])
  PRINT("poly_32         ",t[154])
  PRINT("poly_64         ",t[155])
  PRINT("poly_128        ",t[156])
  PRINT("poly_256        ",t[157])
  PRINT("poly_512        ",t[158])
  PRINT("poly_512_plus   ",t[168])
  PRINT("48_4_plus       ",t[0])
  PRINT("48_32_plus      ",t[1])
  PRINT("48_8192_plus    ",t[2])


  PRINT("4fft_8_0        ",t[50])
  PRINT("      un        ",t[51])
  PRINT("4fft_16_192     ",t[52])
  PRINT("         un     ",t[53])
  PRINT("32fft_32_0      ",t[54])
  PRINT("        un      ",t[55])
  PRINT("32fft_512       ",t[56])
  PRINT("       un       ",t[57])
  PRINT("8192_spread     ",t[58])
  PRINT("         un     ",t[59])

  PRINT("1               ",t[100])
  PRINT("2               ",t[101])
  PRINT("4               ",t[102])

  printf("Timings are in ticks. Nanoseconds per tick: approximately %f.\n"
    ,timing_basic_diff(&finishb,&startb) / timing_diff(&finish,&start));
  printf("Timings may be underestimates on systems without hardware tick support.\n");

  exit(0);
}
