aboutsummaryrefslogtreecommitdiffstats
path: root/arch/riscv/crypto/aes-riscv64-zvkned.S
blob: 23d063f94ce61d42646dfec778146b1c2529daf2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector AES block cipher extension ('Zvkned')

#include <linux/linkage.h>

.text
.option arch, +zvkned

#include "aes-macros.S"

#define KEYP		a0
#define INP		a1
#define OUTP		a2
#define LEN		a3
#define IVP		a4

.macro	__aes_crypt_zvkned	enc, keylen
	vle32.v		v16, (INP)
	aes_crypt	v16, \enc, \keylen
	vse32.v		v16, (OUTP)
	ret
.endm

.macro	aes_crypt_zvkned	enc
	aes_begin	KEYP, 128f, 192f
	__aes_crypt_zvkned	\enc, 256
128:
	__aes_crypt_zvkned	\enc, 128
192:
	__aes_crypt_zvkned	\enc, 192
.endm

// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
//			   const u8 in[16], u8 out[16]);
SYM_FUNC_START(aes_encrypt_zvkned)
	aes_crypt_zvkned	1
SYM_FUNC_END(aes_encrypt_zvkned)

// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_decrypt_zvkned)
	aes_crypt_zvkned	0
SYM_FUNC_END(aes_decrypt_zvkned)

.macro	__aes_ecb_crypt	enc, keylen
	srli		t0, LEN, 2
	// t0 is the remaining length in 32-bit words.  It's a multiple of 4.
1:
	vsetvli		t1, t0, e32, m8, ta, ma
	sub		t0, t0, t1	// Subtract number of words processed
	slli		t1, t1, 2	// Words to bytes
	vle32.v		v16, (INP)
	aes_crypt	v16, \enc, \keylen
	vse32.v		v16, (OUTP)
	add		INP, INP, t1
	add		OUTP, OUTP, t1
	bnez		t0, 1b

	ret
.endm

.macro	aes_ecb_crypt	enc
	aes_begin	KEYP, 128f, 192f
	__aes_ecb_crypt	\enc, 256
128:
	__aes_ecb_crypt	\enc, 128
192:
	__aes_ecb_crypt	\enc, 192
.endm

// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
//			       const u8 *in, u8 *out, size_t len);
//
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
SYM_FUNC_START(aes_ecb_encrypt_zvkned)
	aes_ecb_crypt	1
SYM_FUNC_END(aes_ecb_encrypt_zvkned)

// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_ecb_decrypt_zvkned)
	aes_ecb_crypt	0
SYM_FUNC_END(aes_ecb_decrypt_zvkned)

.macro	aes_cbc_encrypt	keylen
	vle32.v		v16, (IVP)	// Load IV
1:
	vle32.v		v17, (INP)	// Load plaintext block
	vxor.vv		v16, v16, v17	// XOR with IV or prev ciphertext block
	aes_encrypt	v16, \keylen	// Encrypt
	vse32.v		v16, (OUTP)	// Store ciphertext block
	addi		INP, INP, 16
	addi		OUTP, OUTP, 16
	addi		LEN, LEN, -16
	bnez		LEN, 1b

	vse32.v		v16, (IVP)	// Store next IV
	ret
.endm

.macro	aes_cbc_decrypt	keylen
	srli		LEN, LEN, 2	// Convert LEN from bytes to words
	vle32.v		v16, (IVP)	// Load IV
1:
	vsetvli		t0, LEN, e32, m4, ta, ma
	vle32.v		v20, (INP)	// Load ciphertext blocks
	vslideup.vi	v16, v20, 4	// Setup prev ciphertext blocks
	addi		t1, t0, -4
	vslidedown.vx	v24, v20, t1	// Save last ciphertext block
	aes_decrypt	v20, \keylen	// Decrypt the blocks
	vxor.vv		v20, v20, v16	// XOR with prev ciphertext blocks
	vse32.v		v20, (OUTP)	// Store plaintext blocks
	vmv.v.v		v16, v24	// Next "IV" is last ciphertext block
	slli		t1, t0, 2	// Words to bytes
	add		INP, INP, t1
	add		OUTP, OUTP, t1
	sub		LEN, LEN, t0
	bnez		LEN, 1b

	vsetivli	zero, 4, e32, m1, ta, ma
	vse32.v		v16, (IVP)	// Store next IV
	ret
.endm

// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
//			       const u8 *in, u8 *out, size_t len, u8 iv[16]);
//
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
SYM_FUNC_START(aes_cbc_encrypt_zvkned)
	aes_begin	KEYP, 128f, 192f
	aes_cbc_encrypt	256
128:
	aes_cbc_encrypt	128
192:
	aes_cbc_encrypt	192
SYM_FUNC_END(aes_cbc_encrypt_zvkned)

// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_cbc_decrypt_zvkned)
	aes_begin	KEYP, 128f, 192f
	aes_cbc_decrypt	256
128:
	aes_cbc_decrypt	128
192:
	aes_cbc_decrypt	192
SYM_FUNC_END(aes_cbc_decrypt_zvkned)

.macro	aes_cbc_cts_encrypt	keylen

	// CBC-encrypt all blocks except the last.  But don't store the
	// second-to-last block to the output buffer yet, since it will be
	// handled specially in the ciphertext stealing step.  Exception: if the
	// message is single-block, still encrypt the last (and only) block.
	li		t0, 16
	j		2f
1:
	vse32.v		v16, (OUTP)	// Store ciphertext block
	addi		OUTP, OUTP, 16
2:
	vle32.v		v17, (INP)	// Load plaintext block
	vxor.vv		v16, v16, v17	// XOR with IV or prev ciphertext block
	aes_encrypt	v16, \keylen	// Encrypt
	addi		INP, INP, 16
	addi		LEN, LEN, -16
	bgt		LEN, t0, 1b	// Repeat if more than one block remains

	// Special case: if the message is a single block, just do CBC.
	beqz		LEN, .Lcts_encrypt_done\@

	// Encrypt the last two blocks using ciphertext stealing as follows:
	//	C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
	//	C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
	//
	// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
	// plaintext block.  Block n, the last block, may be partial; its length
	// is 1 <= LEN <= 16.  If there are only 2 blocks, C[n-2] means the IV.
	//
	// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
	// INP points to P[n].  OUTP points to where C[n-1] should go.
	// To support in-place encryption, load P[n] before storing C[n].
	addi		t0, OUTP, 16	// Get pointer to where C[n] should go
	vsetvli		zero, LEN, e8, m1, tu, ma
	vle8.v		v17, (INP)	// Load P[n]
	vse8.v		v16, (t0)	// Store C[n]
	vxor.vv		v16, v16, v17	// v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
	vsetivli	zero, 4, e32, m1, ta, ma
	aes_encrypt	v16, \keylen
.Lcts_encrypt_done\@:
	vse32.v		v16, (OUTP)	// Store C[n-1] (or C[n] in single-block case)
	ret
.endm

#define LEN32		t4 // Length of remaining full blocks in 32-bit words
#define LEN_MOD16	t5 // Length of message in bytes mod 16

.macro	aes_cbc_cts_decrypt	keylen
	andi		LEN32, LEN, ~15
	srli		LEN32, LEN32, 2
	andi		LEN_MOD16, LEN, 15

	// Save C[n-2] in v28 so that it's available later during the ciphertext
	// stealing step.  If there are fewer than three blocks, C[n-2] means
	// the IV, otherwise it means the third-to-last ciphertext block.
	vmv.v.v		v28, v16	// IV
	add		t0, LEN, -33
	bltz		t0, .Lcts_decrypt_loop\@
	andi		t0, t0, ~15
	add		t0, t0, INP
	vle32.v		v28, (t0)

	// CBC-decrypt all full blocks.  For the last full block, or the last 2
	// full blocks if the message is block-aligned, this doesn't write the
	// correct output blocks (unless the message is only a single block),
	// because it XORs the wrong values with the raw AES plaintexts.  But we
	// fix this after this loop without redoing the AES decryptions.  This
	// approach allows more of the AES decryptions to be parallelized.
.Lcts_decrypt_loop\@:
	vsetvli		t0, LEN32, e32, m4, ta, ma
	addi		t1, t0, -4
	vle32.v		v20, (INP)	// Load next set of ciphertext blocks
	vmv.v.v		v24, v16	// Get IV or last ciphertext block of prev set
	vslideup.vi	v24, v20, 4	// Setup prev ciphertext blocks
	vslidedown.vx	v16, v20, t1	// Save last ciphertext block of this set
	aes_decrypt	v20, \keylen	// Decrypt this set of blocks
	vxor.vv		v24, v24, v20	// XOR prev ciphertext blocks with decrypted blocks
	vse32.v		v24, (OUTP)	// Store this set of plaintext blocks
	sub		LEN32, LEN32, t0
	slli		t0, t0, 2	// Words to bytes
	add		INP, INP, t0
	add		OUTP, OUTP, t0
	bnez		LEN32, .Lcts_decrypt_loop\@

	vsetivli	zero, 4, e32, m4, ta, ma
	vslidedown.vx	v20, v20, t1	// Extract raw plaintext of last full block
	addi		t0, OUTP, -16	// Get pointer to last full plaintext block
	bnez		LEN_MOD16, .Lcts_decrypt_non_block_aligned\@

	// Special case: if the message is a single block, just do CBC.
	li		t1, 16
	beq		LEN, t1, .Lcts_decrypt_done\@

	// Block-aligned message.  Just fix up the last 2 blocks.  We need:
	//
	//	P[n-1] = Decrypt(C[n]) ^ C[n-2]
	//	P[n] = Decrypt(C[n-1]) ^ C[n]
	//
	// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
	// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
	// is everything needed to fix the output without re-decrypting blocks.
	addi		t1, OUTP, -32	// Get pointer to where P[n-1] should go
	vxor.vv		v20, v20, v28	// Decrypt(C[n]) ^ C[n-2] == P[n-1]
	vle32.v		v24, (t1)	// Decrypt(C[n-1]) ^ C[n-2]
	vse32.v		v20, (t1)	// Store P[n-1]
	vxor.vv		v20, v24, v16	// Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
	j		.Lcts_decrypt_finish\@

.Lcts_decrypt_non_block_aligned\@:
	// Decrypt the last two blocks using ciphertext stealing as follows:
	//
	//	P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
	//	P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
	//
	// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
	vmv.v.v		v16, v20	// v16 = Decrypt(C[n-1])
	vsetvli		zero, LEN_MOD16, e8, m1, tu, ma
	vle8.v		v20, (INP)	// v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
	vxor.vv		v16, v16, v20	// v16 = Decrypt(C[n-1]) ^ C[n]
	vse8.v		v16, (OUTP)	// Store P[n]
	vsetivli	zero, 4, e32, m1, ta, ma
	aes_decrypt	v20, \keylen	// v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
.Lcts_decrypt_finish\@:
	vxor.vv		v20, v20, v28	// XOR with C[n-2]
	vse32.v		v20, (t0)	// Store last full plaintext block
.Lcts_decrypt_done\@:
	ret
.endm

.macro	aes_cbc_cts_crypt	keylen
	vle32.v		v16, (IVP)	// Load IV
	beqz		a5, .Lcts_decrypt\@
	aes_cbc_cts_encrypt \keylen
.Lcts_decrypt\@:
	aes_cbc_cts_decrypt \keylen
.endm

// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
//			         const u8 *in, u8 *out, size_t len,
//				 const u8 iv[16], bool enc);
//
// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
// This is the variant that unconditionally swaps the last two blocks.
SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
	aes_begin	KEYP, 128f, 192f
	aes_cbc_cts_crypt 256
128:
	aes_cbc_cts_crypt 128
192:
	aes_cbc_cts_crypt 192
SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)