vlang
diff --git a/‎vlib/crypto/sha3/sha3_state_generic.v‎
Lines changed: 70 additions & 186 deletions b/‎vlib/crypto/sha3/sha3_state_generic.v‎
Lines changed: 70 additions & 186 deletions
@@ -30,7 +30,7 @@ mut:
 // The state is not modified by this method.
 @[direct_array_access]
 fn (s State) to_bytes() []u8 {
-	mut byte_array := []u8{len: 200, cap: 200}
+	mut byte_array := []u8{len: 200}
 	mut index := 0
 
 	for y in 0 .. 5 {
@@ -91,175 +91,79 @@ fn (mut s State) xor_bytes(byte_array []u8, rate int) {
 
 // kaccak_p_1600_24 performs 24 rounnds on a 1600 bit state.
 //
-// The loop is unrolled to get a little better performance.
+@[direct_array_access]
 fn (mut s State) kaccak_p_1600_24() {
-	s.rnd(0)
-	s.rnd(1)
-	s.rnd(2)
-	s.rnd(3)
-	s.rnd(4)
-	s.rnd(5)
-	s.rnd(6)
-	s.rnd(7)
-	s.rnd(8)
-	s.rnd(9)
-	s.rnd(10)
-	s.rnd(11)
-	s.rnd(12)
-	s.rnd(13)
-	s.rnd(14)
-	s.rnd(15)
-	s.rnd(16)
-	s.rnd(17)
-	s.rnd(18)
-	s.rnd(19)
-	s.rnd(20)
-	s.rnd(21)
-	s.rnd(22)
-	s.rnd(23)
-}
-
-// rnd is a single round of stepping functions.
-//
-// The definition of a round is the application of the stepping
-// functions theta, rho, pi, chi, and iota, in order, on the
-// state.  The round index also influences the outcome and is
-// constrained to be 0 <= round_index < 24.
-@[inline]
-fn (mut s State) rnd(round_index int) {
-	s.theta()
-	s.rho()
-	s.pi()
-	s.chi()
-	s.iota(round_index)
-}
-
-// theta is the first step mapping function.  It is defined as:
-//
-// 1. For all pairs (x, z) such that 0 <= x < 5 and 0 <= z < w, let
-//    C[x, z] = A[x, 0, z] xor A[x, 1, z] xor A[x, 2, z] xor A[x, 3, z] xor A[x, 4, z].
-// 2. For all pairs (x, z) such that 0 <= x < 5 and 0 <= z < w let
-//    D[x, z] = C[(x-1) mod 5, z] xor C[(x+1) mod 5, (z – 1) mod w].
-// 3. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <=≤ z < w, let
-//    A′[x, y, z] = A[x, y, z] xor D[x, z].
-//
-// A is the 5 x 5 x w state matrix.  w is the number of bits in the z axis, 64 in our case.
-//
-// We can represent a lane from the state matrix as a u64 value and operate
-// on all the bite in the lane with a single 64-bit operation.  And, since
-// we represent a lane as a u64 value, we can reduce the state to a 2
-// dimensional array of u64 values.
-@[direct_array_access; inline]
-fn (mut s State) theta() {
-	// calculate the 5 intermediate C values
-	mut c := [5]Lane{init: 0}
-	for x in 0 .. 5 {
-		for y in 0 .. 5 {
-			c[x] ^= s.a[x][y]
-		}
-	}
-
-	// calculate the 5 intermediate D values
-	mut d := [5]Lane{init: 0}
-	for x in 0 .. 5 {
-		d[x] = bits.rotate_left_64(c[(x + 1) % 5], 1) ^ c[(x + 4) % 5]
-	}
-
-	// add the D values back into the state
-	for x in 0 .. 5 {
+	mut b := [5][5]Lane{}
+	for round_index in 0 .. 24 {
+		// theta
+		// C[x] = A[x,0] xor A[x,1] xor A[x,2] xor A[x,3] xor A[x,4],   for x in 0…4
+		// D[x] = C[x-1] xor rot(C[x+1],1),                             for x in 0…4
+		// A[x,y] = A[x,y] xor D[x],                           for (x,y) in (0…4,0…4)
+		c0 := s.a[0][0] ^ s.a[0][1] ^ s.a[0][2] ^ s.a[0][3] ^ s.a[0][4]
+		c1 := s.a[1][0] ^ s.a[1][1] ^ s.a[1][2] ^ s.a[1][3] ^ s.a[1][4]
+		c2 := s.a[2][0] ^ s.a[2][1] ^ s.a[2][2] ^ s.a[2][3] ^ s.a[2][4]
+		c3 := s.a[3][0] ^ s.a[3][1] ^ s.a[3][2] ^ s.a[3][3] ^ s.a[3][4]
+		c4 := s.a[4][0] ^ s.a[4][1] ^ s.a[4][2] ^ s.a[4][3] ^ s.a[4][4]
+
+		d0 := c4 ^ bits.rotate_left_64(c1, 1)
+		d1 := c0 ^ bits.rotate_left_64(c2, 1)
+		d2 := c1 ^ bits.rotate_left_64(c3, 1)
+		d3 := c2 ^ bits.rotate_left_64(c4, 1)
+		d4 := c3 ^ bits.rotate_left_64(c0, 1)
+
+		// vfmt off
+		s.a[0][0] ^= d0 s.a[0][1] ^= d0 s.a[0][2] ^= d0 s.a[0][3] ^= d0 s.a[0][4] ^= d0
+		s.a[1][0] ^= d1 s.a[1][1] ^= d1 s.a[1][2] ^= d1 s.a[1][3] ^= d1 s.a[1][4] ^= d1
+		s.a[2][0] ^= d2 s.a[2][1] ^= d2 s.a[2][2] ^= d2	s.a[2][3] ^= d2	s.a[2][4] ^= d2
+		s.a[3][0] ^= d3	s.a[3][1] ^= d3	s.a[3][2] ^= d3	s.a[3][3] ^= d3	s.a[3][4] ^= d3
+		s.a[4][0] ^= d4	s.a[4][1] ^= d4	s.a[4][2] ^= d4	s.a[4][3] ^= d4	s.a[4][4] ^= d4
+		// vfmt on
+
+		// rho and pi
+		// B[y,2*x+3*y] = rot(A[x,y], r[x,y]),                 for (x,y) in (0…4,0…4)
+		b[0][0] = s.a[0][0]
+		b[0][1] = bits.rotate_left_64(s.a[3][0], 28)
+		b[0][2] = bits.rotate_left_64(s.a[1][0], 1)
+		b[0][3] = bits.rotate_left_64(s.a[4][0], 27)
+		b[0][4] = bits.rotate_left_64(s.a[2][0], 62)
+
+		b[1][0] = bits.rotate_left_64(s.a[1][1], 44)
+		b[1][1] = bits.rotate_left_64(s.a[4][1], 20)
+		b[1][2] = bits.rotate_left_64(s.a[2][1], 6)
+		b[1][3] = bits.rotate_left_64(s.a[0][1], 36)
+		b[1][4] = bits.rotate_left_64(s.a[3][1], 55)
+
+		b[2][0] = bits.rotate_left_64(s.a[2][2], 43)
+		b[2][1] = bits.rotate_left_64(s.a[0][2], 3)
+		b[2][2] = bits.rotate_left_64(s.a[3][2], 25)
+		b[2][3] = bits.rotate_left_64(s.a[1][2], 10)
+		b[2][4] = bits.rotate_left_64(s.a[4][2], 39)
+
+		b[3][0] = bits.rotate_left_64(s.a[3][3], 21)
+		b[3][1] = bits.rotate_left_64(s.a[1][3], 45)
+		b[3][2] = bits.rotate_left_64(s.a[4][3], 8)
+		b[3][3] = bits.rotate_left_64(s.a[2][3], 15)
+		b[3][4] = bits.rotate_left_64(s.a[0][3], 41)
+
+		b[4][0] = bits.rotate_left_64(s.a[4][4], 14)
+		b[4][1] = bits.rotate_left_64(s.a[2][4], 61)
+		b[4][2] = bits.rotate_left_64(s.a[0][4], 18)
+		b[4][3] = bits.rotate_left_64(s.a[3][4], 56)
+		b[4][4] = bits.rotate_left_64(s.a[1][4], 2)
+
+		// chi
+		// A[x,y] = B[x,y] xor ((not B[x+1,y]) and B[x+2,y]),  for (x,y) in (0…4,0…4)
 		for y in 0 .. 5 {
-			s.a[x][y] ^= d[x]
+			s.a[0][y] = b[0][y] ^ (~b[1][y] & b[2][y])
+			s.a[1][y] = b[1][y] ^ (~b[2][y] & b[3][y])
+			s.a[2][y] = b[2][y] ^ (~b[3][y] & b[4][y])
+			s.a[3][y] = b[3][y] ^ (~b[4][y] & b[0][y])
+			s.a[4][y] = b[4][y] ^ (~b[0][y] & b[1][y])
 		}
-	}
-}
 
-// rho_offsets are the amount of rotation to apply to a particular lane
-// given its position in the state matrix.
-const rho_offsets = [[int(0), 36, 3, 41, 18], [int(1), 44, 10, 45, 2],
-	[int(62), 6, 43, 15, 61], [int(28), 55, 25, 21, 56], [int(27), 20, 39, 8, 14]]
-
-// rho is the second step mapping function.  It is defined as:
-//
-// 1. For all z such that 0 <= z < w, let A′ [0, 0, z] = A[0, 0, z].
-// 2. Let (x, y) = (1, 0).
-// 3. For t from 0 to 23:
-//      a. for all z such that 0 <= z < w, let A′[x, y, z] = A[x, y, (z – (t + 1)(t + 2)/2) mod w];
-//      b. let (x, y) = (y, (2x + 3y) mod 5).
-//
-// A is the 5 x 5 x w state matrix.  w is the number of bits in the z axis, 64 in our case.
-//
-// Step 1 looks worthless since A' will be overwtitten by step 3a.
-//
-// Steps 2 and 3b are defining how the x and y values are initialized and updated
-// as t goes from 0 to 23.  Notice that the initial value of x, y of 0, 0 is not
-// calculated and is just zero.  The other 24 values needed are calculated,
-// making a total of 25, which is the total number of lanes in the state.  By
-// setting the offset at 0, 0 to 0, that lane does not get rotated.
-//
-// The effect of step 3a is to rotate a 64-bit lane by the amount calculated by
-// (((t + 1) * (t + 2)) / 2) % 64.  In order to save time, these rotation values,
-// called offsets, can be calculated ahead of time.
-@[direct_array_access; inline]
-fn (mut s State) rho() {
-	for x in 0 .. 5 {
-		for y in 0 .. 5 {
-			s.a[x][y] = bits.rotate_left_64(s.a[x][y], rho_offsets[x][y])
-		}
-	}
-}
-
-// pi is the third step mapping function.  It is defined as:
-//
-// 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
-//    A′[x, y, z]= A[(x + 3y) mod 5, x, z].
-//
-// A is the 5 x 5 x w state matrix.  w is the number of bits in the z axis, 64 in our case.
-//
-// For this function, we will need to have a temporary version of the state for
-// holding the rearranged lanes.
-@[direct_array_access; inline]
-fn (mut s State) pi() {
-	mut a_prime := [5][5]Lane{}
-	for x in 0 .. 5 {
-		for y in 0 .. 5 {
-			a_prime[x][y] = s.a[(x + (3 * y)) % 5][x]
-		}
-	}
-
-	// make the temporary state be the returned state
-	for x in 0 .. 5 {
-		for y in 0 .. 5 {
-			s.a[x][y] = a_prime[x][y]
-		}
-	}
-}
-
-// chi is the fourth step mapping function.  It is defined as:
-//
-// 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
-//    A′ [x, y, z] = A[x, y, z] xor ((A[(x+1) mod 5, y, z] xor 1) & A[(x+2) mod 5, y, z]).
-//
-// A is the 5 x 5 x w state matrix.  w is the number of bits in the z axis, 64 in our case.
-//
-// The effect of chi is to XOR each bit with a non-linear function of two other bits
-// in its row.
-//
-// For this function, we will need to have a temporary version of the state for
-// holding the changed lanes.
-@[direct_array_access; inline]
-fn (mut s State) chi() {
-	mut a_prime := [5][5]Lane{}
-	for x in 0 .. 5 {
-		for y in 0 .. 5 {
-			a_prime[x][y] = s.a[x][y] ^ (~(s.a[(x + 1) % 5][y]) & s.a[(x + 2) % 5][y])
-		}
-	}
-
-	// make the temporary state be the returned state
-	for x in 0 .. 5 {
-		for y in 0 .. 5 {
-			s.a[x][y] = a_prime[x][y]
-		}
+		// iota
+		// A[0,0] = A[0,0] xor RC
+		s.a[0][0] ^= iota_round_constants[round_index]
 	}
 }
 
@@ -272,26 +176,6 @@ const iota_round_constants = [u64(0x0000000000000001), 0x0000000000008082, 0x800
 	0x800000008000000a, 0x8000000080008081, 0x8000000000008080, 0x0000000080000001,
 	0x8000000080008008]
 
-// iota is the fifth step mapping function.  It is defined as:
-//
-// 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
-//    A′[x, y, z] = A[x, y, z].
-// 2. Let RC = 0**w
-// 3. For j from 0 to l, let RC[2**j – 1] = rc(j + 7i_r).
-// 4. For all z such that 0 ≤ z < w, let A′ [0, 0, z] = A′ [0, 0, z] xor RC[z].
-//
-// A is the 5 x 5 x w state matrix.  w is the number of bits in the z axis, 64 in our case.
-//
-// This is pretty ugly.  Fortunately, all the uglyness can be precomputed so that
-// all we need to do is xor lane 0, 0 with the appropriate precomputed value.  These
-// precomputed values are indexed by the round which is being applied.  For sha3,
-// the number of rounds is 24 so we just need to precompute the 24 valuse needed
-// to xor with lane 0, 0.
-@[direct_array_access; inline]
-fn (mut s State) iota(round_index int) {
-	s.a[0][0] ^= iota_round_constants[round_index]
-}
-
 fn (s State) str() string {
 	mut output := '\n             y = 0            y = 1            y = 2            y = 3            y = 4\n'
 	for x in 0 .. 5 {