/* trivium.c */

/* 
 * Reference implementation of the TRIVIUM stream cipher
 *
 * Author: Christophe De Canni\`ere, K.U.Leuven.
 */

/* ------------------------------------------------------------------------- */

#include "ecrypt-sync.h"

/* ------------------------------------------------------------------------- */

#if defined(ECRYPT_NATIVE64) || (!defined(__MMX__) && !defined(_M_IX86))

typedef u64 m64;

#define SL(m, i) ((m) << (i))
#define SR(m, i) ((m) >> (i))

#define OR(a, b) ((a) | (b))
#define AND(a, b) ((a) & (b))
#define XOR(a, b) ((a) ^ (b))

#define M8V(m) U8V(m)
#define M64TO8_LITTLE(p, i, m) U64TO8_LITTLE(p + i * 8, m)

#define EMPTY()

#else

#include <mmintrin.h>

typedef __m64 m64;

#define SL(m, i) _m_psllqi(m, i)
#define SR(m, i) _m_psrlqi(m, i)

#define OR(a, b) _m_por(a, b)
#define AND(a, b) _m_pand(a, b)
#define XOR(a, b) _m_pxor(a, b)

#define M8V(m) U8V(_m_to_int(m))
#define M64TO8_LITTLE(p, i, m) (((m64*)(p))[i] = (m))

#define EMPTY() _m_empty()

#endif

/* ------------------------------------------------------------------------- */

#define S11(i) SL(s11, ((i) -  64))
#define S12(i) SR(s12, (128 - (i)))
#define S21(i) SL(s21, ((i) - 157))
#define S22(i) SR(s22, (221 - (i)))
#define S31(i) SL(s31, ((i) - 241))
#define S32(i) SR(s32, (305 - (i)))

#define S1(i) OR(S11(i), S12(i))
#define S2(i) OR(S21(i), S22(i))
#define S3(i) OR(S31(i), S32(i))

#define UPDATE()                                                             \
do {                                                                         \
  t1 = XOR(AND(S3(286), S3(287)), S1( 69));                                  \
  t2 = XOR(AND(S1( 91), S1( 92)), S2(171));                                  \
  t3 = XOR(AND(S2(175), S2(176)), S3(264));                                  \
                                                                             \
  s12 = XOR(S1( 66), S1( 93));                                               \
  s22 = XOR(S2(162), S2(177));                                               \
  s32 = XOR(S3(243), S3(288));                                               \
                                                                             \
  t1 = XOR(t1, s32);                                                         \
  t2 = XOR(t2, s12);                                                         \
  t3 = XOR(t3, s22);                                                         \
} while (0)

#define ROTATE()                                                             \
do {                                                                         \
  s12 = s11; s11 = t1;                                                       \
  s22 = s21; s21 = t2;                                                       \
  s32 = s31; s31 = t3;                                                       \
} while (0)

#define LOAD(s)                                                              \
do {                                                                         \
  s11 = ((m64*)s)[0]; s12 = ((m64*)s)[1];                                    \
  s21 = ((m64*)s)[2]; s22 = ((m64*)s)[3];                                    \
  s31 = ((m64*)s)[4]; s32 = ((m64*)s)[5];                                    \
} while (0)

#define STORE(s)                                                             \
do {                                                                         \
  ((m64*)s)[0] = s11; ((m64*)s)[1] = s12;                                    \
  ((m64*)s)[2] = s21; ((m64*)s)[3] = s22;                                    \
  ((m64*)s)[4] = s31; ((m64*)s)[5] = s32;                                    \
} while (0)

/* ------------------------------------------------------------------------- */

void ECRYPT_init(void)
{ }

/* ------------------------------------------------------------------------- */

void ECRYPT_keysetup(
  ECRYPT_ctx* ctx, 
  const u8* key, 
  u32 keysize,
  u32 ivsize)
{
  u32 i;

  ctx->keylen = (keysize + 7) / 8;
  ctx->ivlen = (ivsize + 7) / 8;

  for (i = 0; i < ctx->keylen; ++i)
    ctx->key[i] = key[i];
}

/* ------------------------------------------------------------------------- */

void ECRYPT_ivsetup(
  ECRYPT_ctx* ctx, 
  const u8* iv)
{
  u32 i;

  u8 s[48];

  m64 s11, s12;
  m64 s21, s22;
  m64 s31, s32;

  for (i = 0; i < ctx->keylen; ++i)
    s[i] = ctx->key[i];

  for (i = ctx->keylen; i < 12; ++i)
    s[i] = 0;

  for (i = 0; i < ctx->ivlen; ++i)
    s[i + 16] = iv[i];

  for (i = ctx->ivlen; i < 12; ++i)
    s[i + 16] = 0;

  for (i = 0; i < 13; ++i)
    s[i + 32] = 0;

  s[13 + 32] = 0x0E;

  for (i = 0; i < 6; ++i)
    ctx->s[i] = U8TO64_BIG(s + i * 8);

  LOAD(ctx->s);

  for (i = 0; i < 9; ++i)
    {
      m64 t1, t2, t3;
      
      UPDATE();
      ROTATE();

      UPDATE();
      ROTATE();
    }

  STORE(ctx->s);

  EMPTY();
}

/* ------------------------------------------------------------------------- */

void ECRYPT_process_bytes(
  int action,
  ECRYPT_ctx* ctx, 
  const u8* input, 
  u8* output, 
  u32 msglen)
{
  m64 s11, s12;
  m64 s21, s22;
  m64 s31, s32;

  LOAD(ctx->s);

  for ( ; (int)(msglen -= 16) >= 0; output += 16, input += 16)
    {
      m64 t1, t2, t3;
      
      UPDATE();

      M64TO8_LITTLE(output, 0, XOR(XOR(s12, s22), s32));

      ROTATE();
      UPDATE();

      M64TO8_LITTLE(output, 1, XOR(XOR(s12, s22), s32));

      ROTATE();

      ((m64*)output)[0] = XOR(((m64*)output)[0], ((m64*)input)[0]);
      ((m64*)output)[1] = XOR(((m64*)output)[1], ((m64*)input)[1]);
    }

  for (msglen += 16; (int)msglen > 0; msglen -= 8, output += 8, input += 8)
    {
      m64 t1, t2, t3, z;
      
      UPDATE();

      z = XOR(XOR(s12, s22), s32);

      if (msglen >= 8)
	{
	  M64TO8_LITTLE(output, 0, z);
	  ((m64*)output)[0] = XOR(((m64*)output)[0], ((m64*)input)[0]);
	}
      else
	{
	  u32 i;

	  for (i = 0; i < msglen; ++i, z = SR(z, 8))
	    output[i] = input[i] ^ M8V(z);
	}

      ROTATE();
    }

  STORE(ctx->s);

  EMPTY();
}

/* ------------------------------------------------------------------------- */
