Skip to content

Commit 65cf633

Browse files
authored
crypto.sha3: rewrite and optimize kaccak_p_1600_24() engine, update tests (#26524)
1 parent 0f7c729 commit 65cf633

2 files changed

Lines changed: 70 additions & 3540 deletions

File tree

‎vlib/crypto/sha3/sha3_state_generic.v‎

Lines changed: 70 additions & 186 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ mut:
3030
// The state is not modified by this method.
3131
@[direct_array_access]
3232
fn (s State) to_bytes() []u8 {
33-
mut byte_array := []u8{len: 200, cap: 200}
33+
mut byte_array := []u8{len: 200}
3434
mut index := 0
3535

3636
for y in 0 .. 5 {
@@ -91,175 +91,79 @@ fn (mut s State) xor_bytes(byte_array []u8, rate int) {
9191

9292
// kaccak_p_1600_24 performs 24 rounnds on a 1600 bit state.
9393
//
94-
// The loop is unrolled to get a little better performance.
94+
@[direct_array_access]
9595
fn (mut s State) kaccak_p_1600_24() {
96-
s.rnd(0)
97-
s.rnd(1)
98-
s.rnd(2)
99-
s.rnd(3)
100-
s.rnd(4)
101-
s.rnd(5)
102-
s.rnd(6)
103-
s.rnd(7)
104-
s.rnd(8)
105-
s.rnd(9)
106-
s.rnd(10)
107-
s.rnd(11)
108-
s.rnd(12)
109-
s.rnd(13)
110-
s.rnd(14)
111-
s.rnd(15)
112-
s.rnd(16)
113-
s.rnd(17)
114-
s.rnd(18)
115-
s.rnd(19)
116-
s.rnd(20)
117-
s.rnd(21)
118-
s.rnd(22)
119-
s.rnd(23)
120-
}
121-
122-
// rnd is a single round of stepping functions.
123-
//
124-
// The definition of a round is the application of the stepping
125-
// functions theta, rho, pi, chi, and iota, in order, on the
126-
// state. The round index also influences the outcome and is
127-
// constrained to be 0 <= round_index < 24.
128-
@[inline]
129-
fn (mut s State) rnd(round_index int) {
130-
s.theta()
131-
s.rho()
132-
s.pi()
133-
s.chi()
134-
s.iota(round_index)
135-
}
136-
137-
// theta is the first step mapping function. It is defined as:
138-
//
139-
// 1. For all pairs (x, z) such that 0 <= x < 5 and 0 <= z < w, let
140-
// C[x, z] = A[x, 0, z] xor A[x, 1, z] xor A[x, 2, z] xor A[x, 3, z] xor A[x, 4, z].
141-
// 2. For all pairs (x, z) such that 0 <= x < 5 and 0 <= z < w let
142-
// D[x, z] = C[(x-1) mod 5, z] xor C[(x+1) mod 5, (z – 1) mod w].
143-
// 3. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <=≤ z < w, let
144-
// A′[x, y, z] = A[x, y, z] xor D[x, z].
145-
//
146-
// A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
147-
//
148-
// We can represent a lane from the state matrix as a u64 value and operate
149-
// on all the bite in the lane with a single 64-bit operation. And, since
150-
// we represent a lane as a u64 value, we can reduce the state to a 2
151-
// dimensional array of u64 values.
152-
@[direct_array_access; inline]
153-
fn (mut s State) theta() {
154-
// calculate the 5 intermediate C values
155-
mut c := [5]Lane{init: 0}
156-
for x in 0 .. 5 {
157-
for y in 0 .. 5 {
158-
c[x] ^= s.a[x][y]
159-
}
160-
}
161-
162-
// calculate the 5 intermediate D values
163-
mut d := [5]Lane{init: 0}
164-
for x in 0 .. 5 {
165-
d[x] = bits.rotate_left_64(c[(x + 1) % 5], 1) ^ c[(x + 4) % 5]
166-
}
167-
168-
// add the D values back into the state
169-
for x in 0 .. 5 {
96+
mut b := [5][5]Lane{}
97+
for round_index in 0 .. 24 {
98+
// theta
99+
// C[x] = A[x,0] xor A[x,1] xor A[x,2] xor A[x,3] xor A[x,4], for x in 0…4
100+
// D[x] = C[x-1] xor rot(C[x+1],1), for x in 0…4
101+
// A[x,y] = A[x,y] xor D[x], for (x,y) in (0…4,0…4)
102+
c0 := s.a[0][0] ^ s.a[0][1] ^ s.a[0][2] ^ s.a[0][3] ^ s.a[0][4]
103+
c1 := s.a[1][0] ^ s.a[1][1] ^ s.a[1][2] ^ s.a[1][3] ^ s.a[1][4]
104+
c2 := s.a[2][0] ^ s.a[2][1] ^ s.a[2][2] ^ s.a[2][3] ^ s.a[2][4]
105+
c3 := s.a[3][0] ^ s.a[3][1] ^ s.a[3][2] ^ s.a[3][3] ^ s.a[3][4]
106+
c4 := s.a[4][0] ^ s.a[4][1] ^ s.a[4][2] ^ s.a[4][3] ^ s.a[4][4]
107+
108+
d0 := c4 ^ bits.rotate_left_64(c1, 1)
109+
d1 := c0 ^ bits.rotate_left_64(c2, 1)
110+
d2 := c1 ^ bits.rotate_left_64(c3, 1)
111+
d3 := c2 ^ bits.rotate_left_64(c4, 1)
112+
d4 := c3 ^ bits.rotate_left_64(c0, 1)
113+
114+
// vfmt off
115+
s.a[0][0] ^= d0 s.a[0][1] ^= d0 s.a[0][2] ^= d0 s.a[0][3] ^= d0 s.a[0][4] ^= d0
116+
s.a[1][0] ^= d1 s.a[1][1] ^= d1 s.a[1][2] ^= d1 s.a[1][3] ^= d1 s.a[1][4] ^= d1
117+
s.a[2][0] ^= d2 s.a[2][1] ^= d2 s.a[2][2] ^= d2 s.a[2][3] ^= d2 s.a[2][4] ^= d2
118+
s.a[3][0] ^= d3 s.a[3][1] ^= d3 s.a[3][2] ^= d3 s.a[3][3] ^= d3 s.a[3][4] ^= d3
119+
s.a[4][0] ^= d4 s.a[4][1] ^= d4 s.a[4][2] ^= d4 s.a[4][3] ^= d4 s.a[4][4] ^= d4
120+
// vfmt on
121+
122+
// rho and pi
123+
// B[y,2*x+3*y] = rot(A[x,y], r[x,y]), for (x,y) in (0…4,0…4)
124+
b[0][0] = s.a[0][0]
125+
b[0][1] = bits.rotate_left_64(s.a[3][0], 28)
126+
b[0][2] = bits.rotate_left_64(s.a[1][0], 1)
127+
b[0][3] = bits.rotate_left_64(s.a[4][0], 27)
128+
b[0][4] = bits.rotate_left_64(s.a[2][0], 62)
129+
130+
b[1][0] = bits.rotate_left_64(s.a[1][1], 44)
131+
b[1][1] = bits.rotate_left_64(s.a[4][1], 20)
132+
b[1][2] = bits.rotate_left_64(s.a[2][1], 6)
133+
b[1][3] = bits.rotate_left_64(s.a[0][1], 36)
134+
b[1][4] = bits.rotate_left_64(s.a[3][1], 55)
135+
136+
b[2][0] = bits.rotate_left_64(s.a[2][2], 43)
137+
b[2][1] = bits.rotate_left_64(s.a[0][2], 3)
138+
b[2][2] = bits.rotate_left_64(s.a[3][2], 25)
139+
b[2][3] = bits.rotate_left_64(s.a[1][2], 10)
140+
b[2][4] = bits.rotate_left_64(s.a[4][2], 39)
141+
142+
b[3][0] = bits.rotate_left_64(s.a[3][3], 21)
143+
b[3][1] = bits.rotate_left_64(s.a[1][3], 45)
144+
b[3][2] = bits.rotate_left_64(s.a[4][3], 8)
145+
b[3][3] = bits.rotate_left_64(s.a[2][3], 15)
146+
b[3][4] = bits.rotate_left_64(s.a[0][3], 41)
147+
148+
b[4][0] = bits.rotate_left_64(s.a[4][4], 14)
149+
b[4][1] = bits.rotate_left_64(s.a[2][4], 61)
150+
b[4][2] = bits.rotate_left_64(s.a[0][4], 18)
151+
b[4][3] = bits.rotate_left_64(s.a[3][4], 56)
152+
b[4][4] = bits.rotate_left_64(s.a[1][4], 2)
153+
154+
// chi
155+
// A[x,y] = B[x,y] xor ((not B[x+1,y]) and B[x+2,y]), for (x,y) in (0…4,0…4)
170156
for y in 0 .. 5 {
171-
s.a[x][y] ^= d[x]
157+
s.a[0][y] = b[0][y] ^ (~b[1][y] & b[2][y])
158+
s.a[1][y] = b[1][y] ^ (~b[2][y] & b[3][y])
159+
s.a[2][y] = b[2][y] ^ (~b[3][y] & b[4][y])
160+
s.a[3][y] = b[3][y] ^ (~b[4][y] & b[0][y])
161+
s.a[4][y] = b[4][y] ^ (~b[0][y] & b[1][y])
172162
}
173-
}
174-
}
175163

176-
// rho_offsets are the amount of rotation to apply to a particular lane
177-
// given its position in the state matrix.
178-
const rho_offsets = [[int(0), 36, 3, 41, 18], [int(1), 44, 10, 45, 2],
179-
[int(62), 6, 43, 15, 61], [int(28), 55, 25, 21, 56], [int(27), 20, 39, 8, 14]]
180-
181-
// rho is the second step mapping function. It is defined as:
182-
//
183-
// 1. For all z such that 0 <= z < w, let A′ [0, 0, z] = A[0, 0, z].
184-
// 2. Let (x, y) = (1, 0).
185-
// 3. For t from 0 to 23:
186-
// a. for all z such that 0 <= z < w, let A′[x, y, z] = A[x, y, (z – (t + 1)(t + 2)/2) mod w];
187-
// b. let (x, y) = (y, (2x + 3y) mod 5).
188-
//
189-
// A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
190-
//
191-
// Step 1 looks worthless since A' will be overwtitten by step 3a.
192-
//
193-
// Steps 2 and 3b are defining how the x and y values are initialized and updated
194-
// as t goes from 0 to 23. Notice that the initial value of x, y of 0, 0 is not
195-
// calculated and is just zero. The other 24 values needed are calculated,
196-
// making a total of 25, which is the total number of lanes in the state. By
197-
// setting the offset at 0, 0 to 0, that lane does not get rotated.
198-
//
199-
// The effect of step 3a is to rotate a 64-bit lane by the amount calculated by
200-
// (((t + 1) * (t + 2)) / 2) % 64. In order to save time, these rotation values,
201-
// called offsets, can be calculated ahead of time.
202-
@[direct_array_access; inline]
203-
fn (mut s State) rho() {
204-
for x in 0 .. 5 {
205-
for y in 0 .. 5 {
206-
s.a[x][y] = bits.rotate_left_64(s.a[x][y], rho_offsets[x][y])
207-
}
208-
}
209-
}
210-
211-
// pi is the third step mapping function. It is defined as:
212-
//
213-
// 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
214-
// A′[x, y, z]= A[(x + 3y) mod 5, x, z].
215-
//
216-
// A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
217-
//
218-
// For this function, we will need to have a temporary version of the state for
219-
// holding the rearranged lanes.
220-
@[direct_array_access; inline]
221-
fn (mut s State) pi() {
222-
mut a_prime := [5][5]Lane{}
223-
for x in 0 .. 5 {
224-
for y in 0 .. 5 {
225-
a_prime[x][y] = s.a[(x + (3 * y)) % 5][x]
226-
}
227-
}
228-
229-
// make the temporary state be the returned state
230-
for x in 0 .. 5 {
231-
for y in 0 .. 5 {
232-
s.a[x][y] = a_prime[x][y]
233-
}
234-
}
235-
}
236-
237-
// chi is the fourth step mapping function. It is defined as:
238-
//
239-
// 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
240-
// A′ [x, y, z] = A[x, y, z] xor ((A[(x+1) mod 5, y, z] xor 1) & A[(x+2) mod 5, y, z]).
241-
//
242-
// A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
243-
//
244-
// The effect of chi is to XOR each bit with a non-linear function of two other bits
245-
// in its row.
246-
//
247-
// For this function, we will need to have a temporary version of the state for
248-
// holding the changed lanes.
249-
@[direct_array_access; inline]
250-
fn (mut s State) chi() {
251-
mut a_prime := [5][5]Lane{}
252-
for x in 0 .. 5 {
253-
for y in 0 .. 5 {
254-
a_prime[x][y] = s.a[x][y] ^ (~(s.a[(x + 1) % 5][y]) & s.a[(x + 2) % 5][y])
255-
}
256-
}
257-
258-
// make the temporary state be the returned state
259-
for x in 0 .. 5 {
260-
for y in 0 .. 5 {
261-
s.a[x][y] = a_prime[x][y]
262-
}
164+
// iota
165+
// A[0,0] = A[0,0] xor RC
166+
s.a[0][0] ^= iota_round_constants[round_index]
263167
}
264168
}
265169

@@ -272,26 +176,6 @@ const iota_round_constants = [u64(0x0000000000000001), 0x0000000000008082, 0x800
272176
0x800000008000000a, 0x8000000080008081, 0x8000000000008080, 0x0000000080000001,
273177
0x8000000080008008]
274178

275-
// iota is the fifth step mapping function. It is defined as:
276-
//
277-
// 1. For all triples (x, y, z) such that 0 <= x < 5, 0 <= y < 5, and 0 <= z < w, let
278-
// A′[x, y, z] = A[x, y, z].
279-
// 2. Let RC = 0**w
280-
// 3. For j from 0 to l, let RC[2**j – 1] = rc(j + 7i_r).
281-
// 4. For all z such that 0 ≤ z < w, let A′ [0, 0, z] = A′ [0, 0, z] xor RC[z].
282-
//
283-
// A is the 5 x 5 x w state matrix. w is the number of bits in the z axis, 64 in our case.
284-
//
285-
// This is pretty ugly. Fortunately, all the uglyness can be precomputed so that
286-
// all we need to do is xor lane 0, 0 with the appropriate precomputed value. These
287-
// precomputed values are indexed by the round which is being applied. For sha3,
288-
// the number of rounds is 24 so we just need to precompute the 24 valuse needed
289-
// to xor with lane 0, 0.
290-
@[direct_array_access; inline]
291-
fn (mut s State) iota(round_index int) {
292-
s.a[0][0] ^= iota_round_constants[round_index]
293-
}
294-
295179
fn (s State) str() string {
296180
mut output := '\n y = 0 y = 1 y = 2 y = 3 y = 4\n'
297181
for x in 0 .. 5 {

0 commit comments

Comments
 (0)