#include "zmult.h"

#define reg register double
#define alpha16 zmult_alpha16

void zmult_48_8192_spread(double out[16896],double u[8192])
{
  reg t0, t1, t2, t3, t4, t5, t6, t7;
  int i;
  int j;
  for (i = 0;i < 8192;i += 16) {
    for (j = 0;j < 16;++j) {
      out[i * 2 + j] = u[i + j];
      out[i * 2 + j + 16] = 0;
    }
    t0 = u[i];
    t4 = alpha16;
    t4 += t0;
    t4 -= alpha16;
    t0 -= t4;
    out[16384 + (i >> 4)] = t0;
  }
}

void zmult_48_8192_unspread(double out[8192],double in[16384],double top[512])
{
  reg t0, t1, t2, t3, t4, t5, t6, t7;
  int i;
  int j;
  for (j = 0;j < 8192;++j)
    out[j] = 0;
  for (i = 0;i < 8192 - 32;i += 16) {
    for (j = 0;j < 32;++j)
      out[i + j] += in[i * 2 + j];
    t4 = top[i >> 4];
    t4 -= in[i * 2];
    t5 = alpha16;
    t5 += t4;
    t5 -= alpha16;
    t4 -= t5;
    out[i] += t4;
    out[i + 32] += t4;
  }
  for (j = 0;j < 32;++j)
    out[i + j] += in[i * 2 + j];
  t4 = top[i >> 4];
  t4 -= in[i * 2];
  t5 = alpha16;
  t5 += t4;
  t5 -= alpha16;
  t4 -= t5;
  out[i] += t4;
  out[i + 32 - 8192] -= t4;
  i += 16;
  for (j = 0;j < 16;++j)
    out[i + j] += in[i * 2 + j];
  for (j = 16;j < 32;++j)
    out[i + j - 8192] -= in[i * 2 + j];
  t4 = top[i >> 4];
  t4 -= in[i * 2];
  t5 = alpha16;
  t5 += t4;
  t5 -= alpha16;
  t4 -= t5;
  out[i] += t4;
  out[i + 32 - 8192] -= t4;
}

/* Z/(2^393216+1); inputs must already be carried */
/* Z[y]/(y^512+1,2^768-y)<-Z[y]/(y^512+1)->(Z/(2^1536+1))[y]/(y^512+1) */
void zmult_48_8192_plus(double out[8192],double u[8192],double v[8192],double tmp[33968])
{
  int i;
  reg t0, t1, t2, t3, t4, t5, t6, t7;

  zmult_48_8192_spread(tmp,u);
  zmult_32fft_512(tmp,1,tmp + 33792);
  zmult_32fft_512_scale(tmp,tmp + 33792);
  zmult_48_8192_spread(tmp + 16896,v);
  zmult_32fft_512(tmp + 16896,1,tmp + 33792);
  zmult_32fft_512_carry(tmp + 16896,tmp + 33792);
  for (i = 0;i < 16384;i += 32)
    zmult_48_32_plus(tmp + i,tmp + i,tmp + 16896 + i,tmp + 33792);
  zmult_32fft_un512(tmp,1,tmp + 33792);
  zmult_poly_512_plus(tmp + 16896,tmp + 16384,tmp + 33280,tmp + 17408);
  zmult_48_8192_unspread(out,tmp,tmp + 16896);
}
