Skip to content

Commit d7de92e

Browse files
committed
lowmem: Unpack z lazily in verification
Introduce mld_zvec following the lazy polyvec pattern (eager / lazy variants with #define dispatch on MLD_CONFIG_REDUCE_RAM): - mld_zvec_init: in eager mode unpacks the full polyvecl, performs the polyvecl-wide infinity-norm bound check, and NTTs in place. In lazy mode it just stores a pointer to the packed signature bytes. - mld_zvec_get_poly: in eager mode copies a single polynomial from the precomputed vector. In lazy mode unpacks one polynomial, performs the per-poly infinity-norm bound check, and NTTs into the caller-provided buffer. The norm check thus moves out of mld_sign_verify_internal into the zvec init / get_poly accessors, so the verify body no longer has to sequence chknorm explicitly. Add a fused matrix-vector helper mld_polyvec_matrix_pointwise_montgomery_zvec used by verify: - The eager variant is a thin wrapper around the existing mld_polyvec_matrix_pointwise_montgomery_eager (z is already NTT'd by mld_zvec_init). - The lazy variant streams z via mld_zvec_get_poly_lazy and generates the matrix on-the-fly column-by-column, accumulating A[*,l] * z[l] into w. In REDUCE_RAM mode this avoids holding the full unpacked polyvecl z in memory at once, reducing verify allocation by 2-5 KiB per parameter set. Signed-off-by: Matthias J. Kannwischer <matthias@zerorisc.com>
1 parent 9fb9059 commit d7de92e

10 files changed

Lines changed: 336 additions & 31 deletions

File tree

integration/opentitan/reduce_alloc.patch

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,20 @@ diff --git a/sw/device/lib/crypto/include/mldsa.h b/sw/device/lib/crypto/include
1212
- kOtcryptoMldsa44WorkBufferVerifyWords = 22464 / sizeof(uint32_t),
1313
+ kOtcryptoMldsa44WorkBufferKeypairWords = 26912 / sizeof(uint32_t),
1414
+ kOtcryptoMldsa44WorkBufferSignWords = 18208 / sizeof(uint32_t),
15-
+ kOtcryptoMldsa44WorkBufferVerifyWords = 13216 / sizeof(uint32_t),
15+
+ kOtcryptoMldsa44WorkBufferVerifyWords = 11200 / sizeof(uint32_t),
1616

1717
- kOtcryptoMldsa65WorkBufferKeypairWords = 46304 / sizeof(uint32_t),
1818
- kOtcryptoMldsa65WorkBufferSignWords = 44768 / sizeof(uint32_t),
1919
- kOtcryptoMldsa65WorkBufferVerifyWords = 30720 / sizeof(uint32_t),
2020
+ kOtcryptoMldsa65WorkBufferKeypairWords = 37152 / sizeof(uint32_t),
2121
+ kOtcryptoMldsa65WorkBufferSignWords = 23360 / sizeof(uint32_t),
22-
+ kOtcryptoMldsa65WorkBufferVerifyWords = 18400 / sizeof(uint32_t),
22+
+ kOtcryptoMldsa65WorkBufferVerifyWords = 15360 / sizeof(uint32_t),
2323

2424
- kOtcryptoMldsa87WorkBufferKeypairWords = 62688 / sizeof(uint32_t),
2525
- kOtcryptoMldsa87WorkBufferSignWords = 59104 / sizeof(uint32_t),
2626
- kOtcryptoMldsa87WorkBufferVerifyWords = 41216 / sizeof(uint32_t),
2727
+ kOtcryptoMldsa87WorkBufferKeypairWords = 49440 / sizeof(uint32_t),
2828
+ kOtcryptoMldsa87WorkBufferSignWords = 29504 / sizeof(uint32_t),
29-
+ kOtcryptoMldsa87WorkBufferVerifyWords = 24800 / sizeof(uint32_t),
29+
+ kOtcryptoMldsa87WorkBufferVerifyWords = 19712 / sizeof(uint32_t),
3030
};
3131

mldsa/mldsa_native.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,9 @@
351351
#undef mld_polyvec_matrix_pointwise_montgomery
352352
#undef mld_polyvec_matrix_pointwise_montgomery_eager
353353
#undef mld_polyvec_matrix_pointwise_montgomery_lazy
354+
#undef mld_polyvec_matrix_pointwise_montgomery_zvec
355+
#undef mld_polyvec_matrix_pointwise_montgomery_zvec_eager
356+
#undef mld_polyvec_matrix_pointwise_montgomery_zvec_lazy
354357
#undef mld_sk_s1hat
355358
#undef mld_sk_s1hat_eager
356359
#undef mld_sk_s1hat_get_poly
@@ -378,6 +381,15 @@
378381
#undef mld_unpack_sk_t0hat
379382
#undef mld_unpack_sk_t0hat_eager
380383
#undef mld_unpack_sk_t0hat_lazy
384+
#undef mld_zvec
385+
#undef mld_zvec_eager
386+
#undef mld_zvec_get_poly
387+
#undef mld_zvec_get_poly_eager
388+
#undef mld_zvec_get_poly_lazy
389+
#undef mld_zvec_init
390+
#undef mld_zvec_init_eager
391+
#undef mld_zvec_init_lazy
392+
#undef mld_zvec_lazy
381393
/* mldsa/src/rounding.h */
382394
#undef MLD_2_POW_D
383395
#undef MLD_ROUNDING_H

mldsa/mldsa_native.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -916,33 +916,33 @@ int MLD_API_NAMESPACE(pk_from_sk)(
916916
#define MLD_TOTAL_ALLOC_44_KEYPAIR_PCT 52544
917917
#define MLD_TOTAL_ALLOC_44_PK_FROM_SK 45248
918918
#define MLD_TOTAL_ALLOC_44_SIGN 48800
919-
#define MLD_TOTAL_ALLOC_44_VERIFY 38816
919+
#define MLD_TOTAL_ALLOC_44_VERIFY 39840
920920
#define MLD_TOTAL_ALLOC_65_KEYPAIR_NO_PCT 65792
921921
#define MLD_TOTAL_ALLOC_65_KEYPAIR_PCT 79712
922922
#define MLD_TOTAL_ALLOC_65_PK_FROM_SK 71872
923923
#define MLD_TOTAL_ALLOC_65_SIGN 74432
924-
#define MLD_TOTAL_ALLOC_65_VERIFY 62432
924+
#define MLD_TOTAL_ALLOC_65_VERIFY 63456
925925
#define MLD_TOTAL_ALLOC_87_KEYPAIR_NO_PCT 104704
926926
#define MLD_TOTAL_ALLOC_87_KEYPAIR_PCT 122624
927927
#define MLD_TOTAL_ALLOC_87_PK_FROM_SK 112832
928928
#define MLD_TOTAL_ALLOC_87_SIGN 115392
929-
#define MLD_TOTAL_ALLOC_87_VERIFY 99552
929+
#define MLD_TOTAL_ALLOC_87_VERIFY 100576
930930
#else /* MLD_API_LEGACY_CONFIG || !MLD_CONFIG_REDUCE_RAM */
931931
#define MLD_TOTAL_ALLOC_44_KEYPAIR_NO_PCT 26912
932932
#define MLD_TOTAL_ALLOC_44_KEYPAIR_PCT 26912
933933
#define MLD_TOTAL_ALLOC_44_PK_FROM_SK 30944
934934
#define MLD_TOTAL_ALLOC_44_SIGN 18208
935-
#define MLD_TOTAL_ALLOC_44_VERIFY 13216
935+
#define MLD_TOTAL_ALLOC_44_VERIFY 11200
936936
#define MLD_TOTAL_ALLOC_65_KEYPAIR_NO_PCT 37152
937937
#define MLD_TOTAL_ALLOC_65_KEYPAIR_PCT 37152
938938
#define MLD_TOTAL_ALLOC_65_PK_FROM_SK 43232
939939
#define MLD_TOTAL_ALLOC_65_SIGN 23360
940-
#define MLD_TOTAL_ALLOC_65_VERIFY 18400
940+
#define MLD_TOTAL_ALLOC_65_VERIFY 15360
941941
#define MLD_TOTAL_ALLOC_87_KEYPAIR_NO_PCT 49440
942942
#define MLD_TOTAL_ALLOC_87_KEYPAIR_PCT 49440
943943
#define MLD_TOTAL_ALLOC_87_PK_FROM_SK 57568
944944
#define MLD_TOTAL_ALLOC_87_SIGN 29504
945-
#define MLD_TOTAL_ALLOC_87_VERIFY 24800
945+
#define MLD_TOTAL_ALLOC_87_VERIFY 19712
946946
#endif /* !(MLD_API_LEGACY_CONFIG || !MLD_CONFIG_REDUCE_RAM) */
947947
/* check-magic: on */
948948

mldsa/mldsa_native_asm.S

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,9 @@
355355
#undef mld_polyvec_matrix_pointwise_montgomery
356356
#undef mld_polyvec_matrix_pointwise_montgomery_eager
357357
#undef mld_polyvec_matrix_pointwise_montgomery_lazy
358+
#undef mld_polyvec_matrix_pointwise_montgomery_zvec
359+
#undef mld_polyvec_matrix_pointwise_montgomery_zvec_eager
360+
#undef mld_polyvec_matrix_pointwise_montgomery_zvec_lazy
358361
#undef mld_sk_s1hat
359362
#undef mld_sk_s1hat_eager
360363
#undef mld_sk_s1hat_get_poly
@@ -382,6 +385,15 @@
382385
#undef mld_unpack_sk_t0hat
383386
#undef mld_unpack_sk_t0hat_eager
384387
#undef mld_unpack_sk_t0hat_lazy
388+
#undef mld_zvec
389+
#undef mld_zvec_eager
390+
#undef mld_zvec_get_poly
391+
#undef mld_zvec_get_poly_eager
392+
#undef mld_zvec_get_poly_lazy
393+
#undef mld_zvec_init
394+
#undef mld_zvec_init_eager
395+
#undef mld_zvec_init_lazy
396+
#undef mld_zvec_lazy
385397
/* mldsa/src/rounding.h */
386398
#undef MLD_2_POW_D
387399
#undef MLD_ROUNDING_H

mldsa/src/polyvec_lazy.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,19 @@ void mld_polyvec_matrix_pointwise_montgomery_eager(mld_polyveck *t,
200200
mld_assert_abs_bound_2d(t->vec, MLDSA_K, MLDSA_N, MLDSA_Q);
201201
}
202202

203+
MLD_INTERNAL_API
204+
int mld_polyvec_matrix_pointwise_montgomery_zvec_eager(mld_polyveck *w,
205+
mld_polymat_eager *mat,
206+
mld_zvec_eager *z,
207+
mld_poly *scratch)
208+
{
209+
/* The infinity-norm bound check on z and the NTT of z have already
210+
* been performed in mld_zvec_init_eager. */
211+
(void)scratch;
212+
mld_polyvec_matrix_pointwise_montgomery_eager(w, mat, &z->vec);
213+
return 0;
214+
}
215+
203216
#endif /* !MLD_CONFIG_REDUCE_RAM || MLD_UNIT_TEST */
204217

205218
#if defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
@@ -232,6 +245,40 @@ void mld_polyvec_matrix_pointwise_montgomery_lazy(mld_polyveck *t,
232245
}
233246
}
234247

248+
MLD_INTERNAL_API
249+
int mld_polyvec_matrix_pointwise_montgomery_zvec_lazy(mld_polyveck *w,
250+
mld_polymat_lazy *mat,
251+
mld_zvec_lazy *z,
252+
mld_poly *scratch)
253+
{
254+
unsigned int k, l;
255+
256+
for (l = 0; l < MLDSA_L; l++)
257+
{
258+
/* mld_zvec_get_poly_lazy unpacks z[l], performs the per-poly
259+
* infinity-norm bound check, and NTTs scratch in place. */
260+
if (mld_zvec_get_poly_lazy(scratch, z, l))
261+
{
262+
return MLD_ERR_FAIL;
263+
}
264+
for (k = 0; k < MLDSA_K; k++)
265+
{
266+
const mld_poly *a_kl = mld_polymat_get_poly_lazy(mat, k, l);
267+
if (l == 0)
268+
{
269+
mld_poly_pointwise_montgomery(&w->vec[k], a_kl, scratch);
270+
}
271+
else
272+
{
273+
mld_poly_pointwise_montgomery(&mat->tmp, a_kl, scratch);
274+
mld_poly_add(&w->vec[k], &mat->tmp);
275+
}
276+
}
277+
}
278+
mld_polyveck_reduce(w);
279+
return 0;
280+
}
281+
235282
#endif /* MLD_CONFIG_REDUCE_RAM || MLD_UNIT_TEST */
236283

237284
/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.

0 commit comments

Comments
 (0)