3030// The state is not modified by this method.
3131@[direct_array_access]
3232fn (s State) to_bytes () []u8 {
33- mut byte_array := []u8 {len: 200 , cap: 200 }
33+ mut byte_array := []u8 {len: 200 }
3434 mut index := 0
3535
3636 for y in 0 .. 5 {
@@ -91,175 +91,79 @@ fn (mut s State) xor_bytes(byte_array []u8, rate int) {
9191
9292// kaccak_p_1600_24 performs 24 rounnds on a 1600 bit state.
9393//
94- // The loop is unrolled to get a little better performance.
94+ @[direct_array_access]
9595fn (mut s State) kaccak_p_1600_24 () {
96- s.rnd (0 )
97- s.rnd (1 )
98- s.rnd (2 )
99- s.rnd (3 )
100- s.rnd (4 )
101- s.rnd (5 )
102- s.rnd (6 )
103- s.rnd (7 )
104- s.rnd (8 )
105- s.rnd (9 )
106- s.rnd (10 )
107- s.rnd (11 )
108- s.rnd (12 )
109- s.rnd (13 )
110- s.rnd (14 )
111- s.rnd (15 )
112- s.rnd (16 )
113- s.rnd (17 )
114- s.rnd (18 )
115- s.rnd (19 )
116- s.rnd (20 )
117- s.rnd (21 )
118- s.rnd (22 )
119- s.rnd (23 )
120- }
121-
122- // rnd is a single round of stepping functions.
123- //
124- // The definition of a round is the application of the stepping
125- // functions theta, rho, pi, chi, and iota, in order, on the
126- // state. The round index also influences the outcome and is
127- // constrained to be 0 <= round_index < 24.
128- @[inline]
129- fn (mut s State) rnd (round_index int ) {
130- s.theta ()
131- s.rho ()
132- s.pi ()
133- s.chi ()
134- s.iota (round_index)
135- }
136-
137- // theta is the first step mapping function. It is defined as:
138- //
139- // 1. For all pairs (x, z) such that 0 <= x < 5 and 0 <= z < w, let
140- // C[x, z] = A[x, 0, z] xor A[x, 1, z] xor A[x, 2, z] xor A[x, 3, z] xor A[x, 4, z].
141- // 2. For all pairs (x, z) such that 0 <= x < 5 and 0 <= z < w let
142- // D[x, z] = C[(x-1) mod 5, z] xor C[(x+1) mod 5, (z – 1) mod w].
143- // 3. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <=≤ z < w, let
144- // A′[x, y, z] = A[x, y, z] xor D[x, z].
145- //
146- // A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
147- //
148- // We can represent a lane from the state matrix as a u64 value and operate
149- // on all the bite in the lane with a single 64-bit operation. And, since
150- // we represent a lane as a u64 value, we can reduce the state to a 2
151- // dimensional array of u64 values.
152- @[direct_array_access; inline]
153- fn (mut s State) theta () {
154- // calculate the 5 intermediate C values
155- mut c := [5 ]Lane{init: 0 }
156- for x in 0 .. 5 {
157- for y in 0 .. 5 {
158- c[x] ^= s.a[x][y]
159- }
160- }
161-
162- // calculate the 5 intermediate D values
163- mut d := [5 ]Lane{init: 0 }
164- for x in 0 .. 5 {
165- d[x] = bits.rotate_left_64 (c[(x + 1 ) % 5 ], 1 ) ^ c[(x + 4 ) % 5 ]
166- }
167-
168- // add the D values back into the state
169- for x in 0 .. 5 {
96+ mut b := [5 ][5 ]Lane{}
97+ for round_index in 0 .. 24 {
98+ // theta
99+ // C[x] = A[x,0] xor A[x,1] xor A[x,2] xor A[x,3] xor A[x,4], for x in 0…4
100+ // D[x] = C[x-1] xor rot(C[x+1],1), for x in 0…4
101+ // A[x,y] = A[x,y] xor D[x], for (x,y) in (0…4,0…4)
102+ c0 := s.a[0 ][0 ] ^ s.a[0 ][1 ] ^ s.a[0 ][2 ] ^ s.a[0 ][3 ] ^ s.a[0 ][4 ]
103+ c1 := s.a[1 ][0 ] ^ s.a[1 ][1 ] ^ s.a[1 ][2 ] ^ s.a[1 ][3 ] ^ s.a[1 ][4 ]
104+ c2 := s.a[2 ][0 ] ^ s.a[2 ][1 ] ^ s.a[2 ][2 ] ^ s.a[2 ][3 ] ^ s.a[2 ][4 ]
105+ c3 := s.a[3 ][0 ] ^ s.a[3 ][1 ] ^ s.a[3 ][2 ] ^ s.a[3 ][3 ] ^ s.a[3 ][4 ]
106+ c4 := s.a[4 ][0 ] ^ s.a[4 ][1 ] ^ s.a[4 ][2 ] ^ s.a[4 ][3 ] ^ s.a[4 ][4 ]
107+
108+ d0 := c4 ^ bits.rotate_left_64 (c1 , 1 )
109+ d1 := c0 ^ bits.rotate_left_64 (c2 , 1 )
110+ d2 := c1 ^ bits.rotate_left_64 (c3 , 1 )
111+ d3 := c2 ^ bits.rotate_left_64 (c4 , 1 )
112+ d4 := c3 ^ bits.rotate_left_64 (c0 , 1 )
113+
114+ // vfmt off
115+ s.a[0 ][0 ] ^= d0 s.a[0 ][1 ] ^= d0 s.a[0 ][2 ] ^= d0 s.a[0 ][3 ] ^= d0 s.a[0 ][4 ] ^= d0
116+ s.a[1 ][0 ] ^= d1 s.a[1 ][1 ] ^= d1 s.a[1 ][2 ] ^= d1 s.a[1 ][3 ] ^= d1 s.a[1 ][4 ] ^= d1
117+ s.a[2 ][0 ] ^= d2 s.a[2 ][1 ] ^= d2 s.a[2 ][2 ] ^= d2 s.a[2 ][3 ] ^= d2 s.a[2 ][4 ] ^= d2
118+ s.a[3 ][0 ] ^= d3 s.a[3 ][1 ] ^= d3 s.a[3 ][2 ] ^= d3 s.a[3 ][3 ] ^= d3 s.a[3 ][4 ] ^= d3
119+ s.a[4 ][0 ] ^= d4 s.a[4 ][1 ] ^= d4 s.a[4 ][2 ] ^= d4 s.a[4 ][3 ] ^= d4 s.a[4 ][4 ] ^= d4
120+ // vfmt on
121+
122+ // rho and pi
123+ // B[y,2*x+3*y] = rot(A[x,y], r[x,y]), for (x,y) in (0…4,0…4)
124+ b[0 ][0 ] = s.a[0 ][0 ]
125+ b[0 ][1 ] = bits.rotate_left_64 (s.a[3 ][0 ], 28 )
126+ b[0 ][2 ] = bits.rotate_left_64 (s.a[1 ][0 ], 1 )
127+ b[0 ][3 ] = bits.rotate_left_64 (s.a[4 ][0 ], 27 )
128+ b[0 ][4 ] = bits.rotate_left_64 (s.a[2 ][0 ], 62 )
129+
130+ b[1 ][0 ] = bits.rotate_left_64 (s.a[1 ][1 ], 44 )
131+ b[1 ][1 ] = bits.rotate_left_64 (s.a[4 ][1 ], 20 )
132+ b[1 ][2 ] = bits.rotate_left_64 (s.a[2 ][1 ], 6 )
133+ b[1 ][3 ] = bits.rotate_left_64 (s.a[0 ][1 ], 36 )
134+ b[1 ][4 ] = bits.rotate_left_64 (s.a[3 ][1 ], 55 )
135+
136+ b[2 ][0 ] = bits.rotate_left_64 (s.a[2 ][2 ], 43 )
137+ b[2 ][1 ] = bits.rotate_left_64 (s.a[0 ][2 ], 3 )
138+ b[2 ][2 ] = bits.rotate_left_64 (s.a[3 ][2 ], 25 )
139+ b[2 ][3 ] = bits.rotate_left_64 (s.a[1 ][2 ], 10 )
140+ b[2 ][4 ] = bits.rotate_left_64 (s.a[4 ][2 ], 39 )
141+
142+ b[3 ][0 ] = bits.rotate_left_64 (s.a[3 ][3 ], 21 )
143+ b[3 ][1 ] = bits.rotate_left_64 (s.a[1 ][3 ], 45 )
144+ b[3 ][2 ] = bits.rotate_left_64 (s.a[4 ][3 ], 8 )
145+ b[3 ][3 ] = bits.rotate_left_64 (s.a[2 ][3 ], 15 )
146+ b[3 ][4 ] = bits.rotate_left_64 (s.a[0 ][3 ], 41 )
147+
148+ b[4 ][0 ] = bits.rotate_left_64 (s.a[4 ][4 ], 14 )
149+ b[4 ][1 ] = bits.rotate_left_64 (s.a[2 ][4 ], 61 )
150+ b[4 ][2 ] = bits.rotate_left_64 (s.a[0 ][4 ], 18 )
151+ b[4 ][3 ] = bits.rotate_left_64 (s.a[3 ][4 ], 56 )
152+ b[4 ][4 ] = bits.rotate_left_64 (s.a[1 ][4 ], 2 )
153+
154+ // chi
155+ // A[x,y] = B[x,y] xor ((not B[x+1,y]) and B[x+2,y]), for (x,y) in (0…4,0…4)
170156 for y in 0 .. 5 {
171- s.a[x][y] ^= d[x]
157+ s.a[0 ][y] = b[0 ][y] ^ (~ b[1 ][y] & b[2 ][y])
158+ s.a[1 ][y] = b[1 ][y] ^ (~ b[2 ][y] & b[3 ][y])
159+ s.a[2 ][y] = b[2 ][y] ^ (~ b[3 ][y] & b[4 ][y])
160+ s.a[3 ][y] = b[3 ][y] ^ (~ b[4 ][y] & b[0 ][y])
161+ s.a[4 ][y] = b[4 ][y] ^ (~ b[0 ][y] & b[1 ][y])
172162 }
173- }
174- }
175163
176- // rho_offsets are the amount of rotation to apply to a particular lane
177- // given its position in the state matrix.
178- const rho_offsets = [[int (0 ), 36 , 3 , 41 , 18 ], [int (1 ), 44 , 10 , 45 , 2 ],
179- [int (62 ), 6 , 43 , 15 , 61 ], [int (28 ), 55 , 25 , 21 , 56 ], [int (27 ), 20 , 39 , 8 , 14 ]]
180-
181- // rho is the second step mapping function. It is defined as:
182- //
183- // 1. For all z such that 0 <= z < w, let A′ [0, 0, z] = A[0, 0, z].
184- // 2. Let (x, y) = (1, 0).
185- // 3. For t from 0 to 23:
186- // a. for all z such that 0 <= z < w, let A′[x, y, z] = A[x, y, (z – (t + 1)(t + 2)/2) mod w];
187- // b. let (x, y) = (y, (2x + 3y) mod 5).
188- //
189- // A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
190- //
191- // Step 1 looks worthless since A' will be overwtitten by step 3a.
192- //
193- // Steps 2 and 3b are defining how the x and y values are initialized and updated
194- // as t goes from 0 to 23. Notice that the initial value of x, y of 0, 0 is not
195- // calculated and is just zero. The other 24 values needed are calculated,
196- // making a total of 25, which is the total number of lanes in the state. By
197- // setting the offset at 0, 0 to 0, that lane does not get rotated.
198- //
199- // The effect of step 3a is to rotate a 64-bit lane by the amount calculated by
200- // (((t + 1) * (t + 2)) / 2) % 64. In order to save time, these rotation values,
201- // called offsets, can be calculated ahead of time.
202- @[direct_array_access; inline]
203- fn (mut s State) rho () {
204- for x in 0 .. 5 {
205- for y in 0 .. 5 {
206- s.a[x][y] = bits.rotate_left_64 (s.a[x][y], rho_offsets[x][y])
207- }
208- }
209- }
210-
211- // pi is the third step mapping function. It is defined as:
212- //
213- // 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
214- // A′[x, y, z]= A[(x + 3y) mod 5, x, z].
215- //
216- // A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
217- //
218- // For this function, we will need to have a temporary version of the state for
219- // holding the rearranged lanes.
220- @[direct_array_access; inline]
221- fn (mut s State) pi () {
222- mut a_prime := [5 ][5 ]Lane{}
223- for x in 0 .. 5 {
224- for y in 0 .. 5 {
225- a_prime[x][y] = s.a[(x + (3 * y)) % 5 ][x]
226- }
227- }
228-
229- // make the temporary state be the returned state
230- for x in 0 .. 5 {
231- for y in 0 .. 5 {
232- s.a[x][y] = a_prime[x][y]
233- }
234- }
235- }
236-
237- // chi is the fourth step mapping function. It is defined as:
238- //
239- // 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
240- // A′ [x, y, z] = A[x, y, z] xor ((A[(x+1) mod 5, y, z] xor 1) & A[(x+2) mod 5, y, z]).
241- //
242- // A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
243- //
244- // The effect of chi is to XOR each bit with a non-linear function of two other bits
245- // in its row.
246- //
247- // For this function, we will need to have a temporary version of the state for
248- // holding the changed lanes.
249- @[direct_array_access; inline]
250- fn (mut s State) chi () {
251- mut a_prime := [5 ][5 ]Lane{}
252- for x in 0 .. 5 {
253- for y in 0 .. 5 {
254- a_prime[x][y] = s.a[x][y] ^ (~ (s.a[(x + 1 ) % 5 ][y]) & s.a[(x + 2 ) % 5 ][y])
255- }
256- }
257-
258- // make the temporary state be the returned state
259- for x in 0 .. 5 {
260- for y in 0 .. 5 {
261- s.a[x][y] = a_prime[x][y]
262- }
164+ // iota
165+ // A[0,0] = A[0,0] xor RC
166+ s.a[0 ][0 ] ^= iota_round_constants[round_index]
263167 }
264168}
265169
@@ -272,26 +176,6 @@ const iota_round_constants = [u64(0x0000000000000001), 0x0000000000008082, 0x800
272176 0x800000008000000a , 0x8000000080008081 , 0x8000000000008080 , 0x0000000080000001 ,
273177 0x8000000080008008 ]
274178
275- // iota is the fifth step mapping function. It is defined as:
276- //
277- // 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
278- // A′[x, y, z] = A[x, y, z].
279- // 2. Let RC = 0**w
280- // 3. For j from 0 to l, let RC[2**j – 1] = rc(j + 7i_r).
281- // 4. For all z such that 0 ≤ z < w, let A′ [0, 0, z] = A′ [0, 0, z] xor RC[z].
282- //
283- // A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
284- //
285- // This is pretty ugly. Fortunately, all the uglyness can be precomputed so that
286- // all we need to do is xor lane 0, 0 with the appropriate precomputed value. These
287- // precomputed values are indexed by the round which is being applied. For sha3,
288- // the number of rounds is 24 so we just need to precompute the 24 valuse needed
289- // to xor with lane 0, 0.
290- @[direct_array_access; inline]
291- fn (mut s State) iota (round_index int ) {
292- s.a[0 ][0 ] ^= iota_round_constants[round_index]
293- }
294-
295179fn (s State) str () string {
296180 mut output := '\n y = 0 y = 1 y = 2 y = 3 y = 4\n '
297181 for x in 0 .. 5 {
0 commit comments