#include "zmult.h"

#define alpha8 zmult_alpha8
#define reg register double /* XXX: long double */

void zmult_48_32_spread(double out[80],double u[48])
{
  reg t0, t1, t2, t3, t4, t5, t6, t7;
  int i;
  for (i = 0;i < 32;i += 2) {
    t0 = u[i];
    t1 = u[i + 1];
    out[i * 2] = t0;
    out[i * 2 + 1] = t1;
    out[i * 2 + 2] = 0;
    out[i * 2 + 3] = 0;
    t4 = alpha8;
    t4 += t0;
    t4 -= alpha8;
    t0 -= t4;
    out[64 + (i >> 1)] = t0;
  }
}

void zmult_48_32_unspread(double out[48],double in[64],double top[16])
{
  int i;
  reg t0, t1, t2, t3, t4, t5, t6, t7;

  t0 = 0;
  t1 = 0;
  t2 = 0;

  for (i = 0;i < 32;i += 2) {
    /* a + (2^192+1) (b-a) is congruent to a mod 2^192+1, b mod 2^8 */
    t0 += in[i + i];
    t1 += in[i + i + 1];
    t2 += in[i + i + 2];
    t3 = in[i + i + 3];
    t4 = top[i >> 1];
    t4 -= in[i + i];
    t5 = alpha8;
    t5 += t4;
    t5 -= alpha8;
    t4 -= t5;
    t0 += t4;
    out[i] = t0;
    out[i + 1] = t1;
    t0 = t2;
    t1 = t3;
    t2 = t4;
  }

  out[0] -= t0;
  out[1] -= t1;
  out[2] -= t2;
}

/* Z/(2^1536+1); inputs must already be carried */
/* Z[y]/(y^16+1,2^96-y) <- Z[y]/(y^16+1) -> (Z/(2^192+1))[y]/(y^16+1) */
void zmult_48_32_plus(double out[32],double u[32],double v[32],double tmp[176])
{
  int i;

  zmult_48_32_spread(tmp,u);
  zmult_4fft_16_192(tmp);
  zmult_4fft_16_scale(tmp);
  zmult_48_32_spread(tmp + 80,v);
  zmult_4fft_16_192(tmp + 80);
  zmult_4fft_16_carry(tmp + 80);
  for (i = 0;i < 64;i += 4)
    zmult_48_4_plus(tmp + i,tmp + i,tmp + 80 + i,tmp + 160);
  zmult_4fft_un16_192(tmp);
  zmult_poly_16_plus(tmp + 80,tmp + 64,tmp + 144,tmp + 96);
  zmult_48_32_unspread(out,tmp,tmp + 80);
}
