/* snefru512.c, snefru512.h: Snefru 512-bit hashing library
Interface by Daniel J. Bernstein <djb@pobox.com>.
Requires 32-bit words.
19960329: changed snefru512_rotated back to static.
19960329: initialized snefru512_rotated directly.
19960328: eliminated OUTPUTBLOCKSIZE.
19960328: changed snefru512_word to uint32.
930622: Replaced static boxes with extern boxes.
930601: Baseline, snefru512 1.0. Subject to Xerox copyright, see below.
No known patent problems.

Design and original code by Ralph Merkle <merkle@parc.xerox.com>;
comp.sources.unix edition by Rich Salz <rsalz@osf.org>.

Documentation in snefru512.3.

Original Xerox copyright:

    Copyright (c) Xerox Corporation 1989.  All rights reserved.

    License to copy and use this software is granted provided that it
    is identified as the 'Xerox Secure Hash Function' in all material
    mentioning or referencing this software or this hash function.

    License is also granted to make and use derivative works provided
    that such works are identified as 'derived from the Xerox Secure
    Hash Function' in all material mentioning or referencing the
    derived work.

    Xerox Corporation makes no representations concerning either the
    merchantability of this software or the suitability of this
    software for any particular purpose.  It is provided "as is"
    without express or implied warranty of any kind.

    These notices must be retained in any copies of any part of this
    software.

Based on Merkle's reference implementation, version 2.0, 31 July 1989.
Snefru 2.0's 8 standard S boxes replaced with Snefru 2.5's 16.
*/

#include "snefru512.h"

#define SBOXCOUNT 16

static uint32 snefru512_rotated[4][SBOXCOUNT][256] = {
#include "snefrusr.c"
} ;

/*
**  This routine is a specialized version of HashN.  It is optimized for
**  speed, and assumes that the input is always 16 words long:  it hashes
**  512 bits, hence its name.  You need not try to figure out this routine
**  unless you wish to figure out a fast implementation of Snefru.
*/
void snefru512(output,input,level)
uint32 *output;
uint32 *input;
int level;
{
 register uint32        Z;
 register uint32        *SBox0;
 register uint32        *SBox1;
 register uint32    B00, B01, B02, B03;
 register uint32    B04, B05, B06, B07;
 register uint32    B08, B09, B10, B11;
 register uint32    B12, B13, B14, B15;
 int index;

 /* Initialize the block to be encrypted from the input.  In theory
  * block<i> should be kept in register.  Not all compilers can do
  * this, even when there are enough registers -- this will degrade
  * performance significantly. */
 B00 = input[0];
 B01 = input[1];
 B02 = input[2];
 B03 = input[3];
 B04 = input[4];
 B05 = input[5];
 B06 = input[6];
 B07 = input[7];
 B08 = input[8];
 B09 = input[9];
 B10 = input[10];
 B11 = input[11];
 B12 = input[12];
 B13 = input[13];
 B14 = input[14];
 B15 = input[15];

 for (index = 0; index < 2 * level; index += 2) {
   /* set up the base address for the two S-box pointers.  */
   SBox0 = snefru512_rotated[0][index];
   SBox1 = SBox0 + 256;

   /* In the following unrolled code, the basic 'assembly language'
    * block that is repeated is:
    *    1    temp1 = shift(block<i>, shiftConstant)
    *    2    temp2 = temp1 & 0x3FC
    *    3    temp3 = S-box<0 or 1> + temp2
    *    4    temp4 = *temp3
    *    5    block<i-1> ^= temp4
    *    6    block<i+1> ^= temp4
    * STEP 1:  Simply shift the i'th 32-bit block to bring the 8-bit
    * byte into the right position.  Note that we will also build-in a
    * left-shift by two bits at this stage, to eliminate the left shift
    * required later because we are indexing into an array of four-byte
    * table entries.
    * 
    * STEP 1:  Mask off the desired eight bits.  Note that 0x3FC is
    * simply 0xFF << 2.
    * 
    * STEP 1:  Use a normal integer add to compute the actual address
    * of the S-box entry.  Note that one of two pointers is used, as
    * appropriate.  Temp3 then holds the actual byte address of the
    * desired S-box entry.
    * 
    * STEP 1:  Load the four-byte S-box entry.
    * 
    * STEPS 5 and 6:  XOR the loaded S-box entry with both the
    * previous and the next 32-bit entries in the 'block' array.
    * 
    * Typical optimizing comilers might fail to put all the block<i>
    * variables into registers. This can result in significant
    * performance degradation. Also, most compilers will use a separate
    * left-shift-by-2 after masking off the needed 8 bits, but the
    * performance degradation caused by this oversight should be modest.
    *
    * Note added by DJB 5/15/91: Yeah, but Sbox* absolutely _have_
    * to be in register variables! I've reorganized the declarations
    * appropriately.
    */
   Z = SBox0[B00 & 0xFF]; B01 ^= Z; B15 ^= Z;
   Z = SBox0[B01 & 0xFF]; B02 ^= Z; B00 ^= Z;
   Z = SBox1[B02 & 0xFF]; B03 ^= Z; B01 ^= Z;
   Z = SBox1[B03 & 0xFF]; B04 ^= Z; B02 ^= Z;
   Z = SBox0[B04 & 0xFF]; B05 ^= Z; B03 ^= Z;
   Z = SBox0[B05 & 0xFF]; B06 ^= Z; B04 ^= Z;
   Z = SBox1[B06 & 0xFF]; B07 ^= Z; B05 ^= Z;
   Z = SBox1[B07 & 0xFF]; B08 ^= Z; B06 ^= Z;
   Z = SBox0[B08 & 0xFF]; B09 ^= Z; B07 ^= Z;
   Z = SBox0[B09 & 0xFF]; B10 ^= Z; B08 ^= Z;
   Z = SBox1[B10 & 0xFF]; B11 ^= Z; B09 ^= Z;
   Z = SBox1[B11 & 0xFF]; B12 ^= Z; B10 ^= Z;
   Z = SBox0[B12 & 0xFF]; B13 ^= Z; B11 ^= Z;
   Z = SBox0[B13 & 0xFF]; B14 ^= Z; B12 ^= Z;
   Z = SBox1[B14 & 0xFF]; B15 ^= Z; B13 ^= Z;
   Z = SBox1[B15 & 0xFF]; B00 ^= Z; B14 ^= Z;

   /* SBox0 = snefru512_rotated[2][index];  */
   SBox0 += 2 * SBOXCOUNT * 256;
   SBox1 = SBox0 + 256;

   Z = SBox0[(B00 >> 16) & 0xFF]; B01 ^= Z; B15 ^= Z;
   Z = SBox0[(B01 >> 16) & 0xFF]; B02 ^= Z; B00 ^= Z;
   Z = SBox1[(B02 >> 16) & 0xFF]; B03 ^= Z; B01 ^= Z;
   Z = SBox1[(B03 >> 16) & 0xFF]; B04 ^= Z; B02 ^= Z;
   Z = SBox0[(B04 >> 16) & 0xFF]; B05 ^= Z; B03 ^= Z;
   Z = SBox0[(B05 >> 16) & 0xFF]; B06 ^= Z; B04 ^= Z;
   Z = SBox1[(B06 >> 16) & 0xFF]; B07 ^= Z; B05 ^= Z;
   Z = SBox1[(B07 >> 16) & 0xFF]; B08 ^= Z; B06 ^= Z;
   Z = SBox0[(B08 >> 16) & 0xFF]; B09 ^= Z; B07 ^= Z;
   Z = SBox0[(B09 >> 16) & 0xFF]; B10 ^= Z; B08 ^= Z;
   Z = SBox1[(B10 >> 16) & 0xFF]; B11 ^= Z; B09 ^= Z;
   Z = SBox1[(B11 >> 16) & 0xFF]; B12 ^= Z; B10 ^= Z;
   Z = SBox0[(B12 >> 16) & 0xFF]; B13 ^= Z; B11 ^= Z;
   Z = SBox0[(B13 >> 16) & 0xFF]; B14 ^= Z; B12 ^= Z;
   Z = SBox1[(B14 >> 16) & 0xFF]; B15 ^= Z; B13 ^= Z;
   Z = SBox1[(B15 >> 16) & 0xFF]; B00 ^= Z; B14 ^= Z;


   /* SBox0 = snefru512_rotated[1][index];  */
   SBox0 -= SBOXCOUNT * 256;
   SBox1 = SBox0 + 256;

   Z = SBox0[B00 >> 24]; B01 ^= Z; B15 ^= Z;
   Z = SBox0[B01 >> 24]; B02 ^= Z; B00 ^= Z;
   Z = SBox1[B02 >> 24]; B03 ^= Z; B01 ^= Z;
   Z = SBox1[B03 >> 24]; B04 ^= Z; B02 ^= Z;
   Z = SBox0[B04 >> 24]; B05 ^= Z; B03 ^= Z;
   Z = SBox0[B05 >> 24]; B06 ^= Z; B04 ^= Z;
   Z = SBox1[B06 >> 24]; B07 ^= Z; B05 ^= Z;
   Z = SBox1[B07 >> 24]; B08 ^= Z; B06 ^= Z;
   Z = SBox0[B08 >> 24]; B09 ^= Z; B07 ^= Z;
   Z = SBox0[B09 >> 24]; B10 ^= Z; B08 ^= Z;
   Z = SBox1[B10 >> 24]; B11 ^= Z; B09 ^= Z;
   Z = SBox1[B11 >> 24]; B12 ^= Z; B10 ^= Z;
   Z = SBox0[B12 >> 24]; B13 ^= Z; B11 ^= Z;
   Z = SBox0[B13 >> 24]; B14 ^= Z; B12 ^= Z;
   Z = SBox1[B14 >> 24]; B15 ^= Z; B13 ^= Z;
   Z = SBox1[B15 >> 24]; B00 ^= Z; B14 ^= Z;


   /* SBox0 = snefru512_rotated[3][index];  */
   SBox0 += 2 * SBOXCOUNT * 256;
   SBox1 = SBox0 + 256;

   Z = SBox0[(B00 >> 8) & 0xFF]; B01 ^= Z; B15 ^= Z;
   Z = SBox0[(B01 >> 8) & 0xFF]; B02 ^= Z; B00 ^= Z;
   Z = SBox1[(B02 >> 8) & 0xFF]; B03 ^= Z; B01 ^= Z;
   Z = SBox1[(B03 >> 8) & 0xFF]; B04 ^= Z; B02 ^= Z;
   Z = SBox0[(B04 >> 8) & 0xFF]; B05 ^= Z; B03 ^= Z;
   Z = SBox0[(B05 >> 8) & 0xFF]; B06 ^= Z; B04 ^= Z;
   Z = SBox1[(B06 >> 8) & 0xFF]; B07 ^= Z; B05 ^= Z;
   Z = SBox1[(B07 >> 8) & 0xFF]; B08 ^= Z; B06 ^= Z;
   Z = SBox0[(B08 >> 8) & 0xFF]; B09 ^= Z; B07 ^= Z;
   Z = SBox0[(B09 >> 8) & 0xFF]; B10 ^= Z; B08 ^= Z;
   Z = SBox1[(B10 >> 8) & 0xFF]; B11 ^= Z; B09 ^= Z;
   Z = SBox1[(B11 >> 8) & 0xFF]; B12 ^= Z; B10 ^= Z;
   Z = SBox0[(B12 >> 8) & 0xFF]; B13 ^= Z; B11 ^= Z;
   Z = SBox0[(B13 >> 8) & 0xFF]; B14 ^= Z; B12 ^= Z;
   Z = SBox1[(B14 >> 8) & 0xFF]; B15 ^= Z; B13 ^= Z;
   Z = SBox1[(B15 >> 8) & 0xFF]; B00 ^= Z; B14 ^= Z;
 }

 output[0] = input[0] ^ B15;
 output[1] = input[1] ^ B14;
 output[2] = input[2] ^ B13;
 output[3] = input[3] ^ B12;
 output[4] = input[4] ^ B11;
 output[5] = input[5] ^ B10;
 output[6] = input[6] ^ B09;
 output[7] = input[7] ^ B08;
}
