sync code with last fixes and improvements from OpenBSD
This commit is contained in:
parent
8f31919cdb
commit
25f3a6cfac
76 changed files with 1289 additions and 694 deletions
|
@ -1,4 +1,4 @@
|
|||
/* $OpenBSD: bn_arch.h,v 1.9 2023/05/28 17:42:30 jsing Exp $ */
|
||||
/* $OpenBSD: bn_arch.h,v 1.10 2023/06/12 16:42:11 jsing Exp $ */
|
||||
/*
|
||||
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
|
||||
*
|
||||
|
@ -63,6 +63,35 @@ bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
|
|||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#define HAVE_BN_QWADDQW
|
||||
|
||||
static inline void
|
||||
bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
|
||||
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
|
||||
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r3, r2, r1, r0;
|
||||
|
||||
__asm__ (
|
||||
"adds xzr, %[carry], #-1 \n"
|
||||
"adcs %[r0], %[a0], %[b0] \n"
|
||||
"adcs %[r1], %[a1], %[b1] \n"
|
||||
"adcs %[r2], %[a2], %[b2] \n"
|
||||
"adcs %[r3], %[a3], %[b3] \n"
|
||||
"cset %[carry], cs \n"
|
||||
: [carry]"+r"(carry), [r3]"=&r"(r3), [r2]"=&r"(r2),
|
||||
[r1]"=&r"(r1), [r0]"=&r"(r0)
|
||||
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
|
||||
[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
|
||||
: "cc");
|
||||
|
||||
*out_carry = carry;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#define HAVE_BN_MULW
|
||||
|
||||
static inline void
|
||||
|
@ -148,6 +177,83 @@ bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
|
|||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#define HAVE_BN_QWMULW_ADDW
|
||||
|
||||
static inline void
|
||||
bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
|
||||
BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
|
||||
BN_ULONG *out_r1, BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r4, r3, r2, r1, r0;
|
||||
|
||||
__asm__ (
|
||||
"umulh %[r1], %[a0], %[b] \n"
|
||||
"mul %[r0], %[a0], %[b] \n"
|
||||
"adds %[r0], %[r0], %[c] \n"
|
||||
"umulh %[r2], %[a1], %[b] \n"
|
||||
"mul %[c], %[a1], %[b] \n"
|
||||
"adcs %[r1], %[r1], %[c] \n"
|
||||
"umulh %[r3], %[a2], %[b] \n"
|
||||
"mul %[c], %[a2], %[b] \n"
|
||||
"adcs %[r2], %[r2], %[c] \n"
|
||||
"umulh %[r4], %[a3], %[b] \n"
|
||||
"mul %[c], %[a3], %[b] \n"
|
||||
"adcs %[r3], %[r3], %[c] \n"
|
||||
"adc %[r4], %[r4], xzr \n"
|
||||
: [c]"+r"(c), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
|
||||
[r1]"=&r"(r1), [r0]"=&r"(r0)
|
||||
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b)
|
||||
: "cc");
|
||||
|
||||
*out_r4 = r4;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#define HAVE_BN_QWMULW_ADDQW_ADDW
|
||||
|
||||
static inline void
|
||||
bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
|
||||
BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
|
||||
BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
|
||||
BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r4, r3, r2, r1, r0;
|
||||
|
||||
__asm__ (
|
||||
"umulh %[r1], %[a0], %[b] \n"
|
||||
"mul %[r0], %[a0], %[b] \n"
|
||||
"adds %[r0], %[r0], %[d] \n"
|
||||
"umulh %[r2], %[a1], %[b] \n"
|
||||
"mul %[d], %[a1], %[b] \n"
|
||||
"adcs %[r1], %[r1], %[d] \n"
|
||||
"umulh %[r3], %[a2], %[b] \n"
|
||||
"mul %[d], %[a2], %[b] \n"
|
||||
"adcs %[r2], %[r2], %[d] \n"
|
||||
"umulh %[r4], %[a3], %[b] \n"
|
||||
"mul %[d], %[a3], %[b] \n"
|
||||
"adcs %[r3], %[r3], %[d] \n"
|
||||
"adc %[r4], %[r4], xzr \n"
|
||||
"adds %[r0], %[r0], %[c0] \n"
|
||||
"adcs %[r1], %[r1], %[c1] \n"
|
||||
"adcs %[r2], %[r2], %[c2] \n"
|
||||
"adcs %[r3], %[r3], %[c3] \n"
|
||||
"adc %[r4], %[r4], xzr \n"
|
||||
: [d]"+r"(d), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
|
||||
[r1]"=&r"(r1), [r0]"=&r"(r0)
|
||||
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b),
|
||||
[c3]"r"(c3), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
|
||||
: "cc");
|
||||
|
||||
*out_r4 = r4;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#define HAVE_BN_SUBW
|
||||
|
||||
static inline void
|
||||
|
@ -187,6 +293,35 @@ bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
|
|||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#define HAVE_BN_QWSUBQW
|
||||
|
||||
static inline void
|
||||
bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
|
||||
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
|
||||
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r3, r2, r1, r0;
|
||||
|
||||
__asm__ (
|
||||
"subs xzr, xzr, %[borrow] \n"
|
||||
"sbcs %[r0], %[a0], %[b0] \n"
|
||||
"sbcs %[r1], %[a1], %[b1] \n"
|
||||
"sbcs %[r2], %[a2], %[b2] \n"
|
||||
"sbcs %[r3], %[a3], %[b3] \n"
|
||||
"cset %[borrow], cc \n"
|
||||
: [borrow]"+r"(borrow), [r3]"=&r"(r3), [r2]"=&r"(r2),
|
||||
[r1]"=&r"(r1), [r0]"=&r"(r0)
|
||||
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
|
||||
[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
|
||||
: "cc");
|
||||
|
||||
*out_borrow = borrow;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $OpenBSD: bn_add.c,v 1.24 2023/02/22 05:46:37 jsing Exp $ */
|
||||
/* $OpenBSD: bn_add.c,v 1.25 2023/06/12 16:17:24 jsing Exp $ */
|
||||
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
|
||||
* All rights reserved.
|
||||
*
|
||||
|
@ -80,18 +80,14 @@ bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
|||
if (n <= 0)
|
||||
return 0;
|
||||
|
||||
#ifndef OPENSSL_SMALL_FOOTPRINT
|
||||
while (n & ~3) {
|
||||
bn_addw_addw(a[0], b[0], carry, &carry, &r[0]);
|
||||
bn_addw_addw(a[1], b[1], carry, &carry, &r[1]);
|
||||
bn_addw_addw(a[2], b[2], carry, &carry, &r[2]);
|
||||
bn_addw_addw(a[3], b[3], carry, &carry, &r[3]);
|
||||
bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
|
||||
carry, &carry, &r[3], &r[2], &r[1], &r[0]);
|
||||
a += 4;
|
||||
b += 4;
|
||||
r += 4;
|
||||
n -= 4;
|
||||
}
|
||||
#endif
|
||||
while (n) {
|
||||
bn_addw_addw(a[0], b[0], carry, &carry, &r[0]);
|
||||
a++;
|
||||
|
@ -165,18 +161,14 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
|||
if (n <= 0)
|
||||
return 0;
|
||||
|
||||
#ifndef OPENSSL_SMALL_FOOTPRINT
|
||||
while (n & ~3) {
|
||||
bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]);
|
||||
bn_subw_subw(a[1], b[1], borrow, &borrow, &r[1]);
|
||||
bn_subw_subw(a[2], b[2], borrow, &borrow, &r[2]);
|
||||
bn_subw_subw(a[3], b[3], borrow, &borrow, &r[3]);
|
||||
bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0],
|
||||
borrow, &borrow, &r[3], &r[2], &r[1], &r[0]);
|
||||
a += 4;
|
||||
b += 4;
|
||||
r += 4;
|
||||
n -= 4;
|
||||
}
|
||||
#endif
|
||||
while (n) {
|
||||
bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]);
|
||||
a++;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $OpenBSD: bn_internal.h,v 1.11 2023/03/07 09:35:55 jsing Exp $ */
|
||||
/* $OpenBSD: bn_internal.h,v 1.12 2023/06/12 16:17:24 jsing Exp $ */
|
||||
/*
|
||||
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
|
||||
*
|
||||
|
@ -122,6 +122,33 @@ bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bn_qwaddqw() computes
|
||||
* (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) + (b3:b2:b1:b0) + carry, where a is a quad word,
|
||||
* b is a quad word, and carry is a single word with value 0 or 1, producing a four
|
||||
* word result and carry.
|
||||
*/
|
||||
#ifndef HAVE_BN_QWADDQW
|
||||
static inline void
|
||||
bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
|
||||
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
|
||||
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r3, r2, r1, r0;
|
||||
|
||||
bn_addw_addw(a0, b0, carry, &carry, &r0);
|
||||
bn_addw_addw(a1, b1, carry, &carry, &r1);
|
||||
bn_addw_addw(a2, b2, carry, &carry, &r2);
|
||||
bn_addw_addw(a3, b3, carry, &carry, &r3);
|
||||
|
||||
*out_carry = carry;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bn_subw() computes r0 = a - b, where both inputs are single words,
|
||||
* producing a single word result and borrow.
|
||||
|
@ -159,6 +186,33 @@ bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bn_qwsubqw() computes
|
||||
* (r3:r2:r1:r0) = (a3:a2:a1:a0) - (b3:b2:b1:b0) - borrow, where a is a quad word,
|
||||
* b is a quad word, and borrow is a single word with value 0 or 1, producing a
|
||||
* four word result and borrow.
|
||||
*/
|
||||
#ifndef HAVE_BN_QWSUBQW
|
||||
static inline void
|
||||
bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
|
||||
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
|
||||
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r3, r2, r1, r0;
|
||||
|
||||
bn_subw_subw(a0, b0, borrow, &borrow, &r0);
|
||||
bn_subw_subw(a1, b1, borrow, &borrow, &r1);
|
||||
bn_subw_subw(a2, b2, borrow, &borrow, &r2);
|
||||
bn_subw_subw(a3, b3, borrow, &borrow, &r3);
|
||||
|
||||
*out_borrow = borrow;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bn_mulw() computes (r1:r0) = a * b, where both inputs are single words,
|
||||
* producing a double word result.
|
||||
|
@ -387,4 +441,58 @@ bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bn_qwmulw_addw() computes (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + c, where a
|
||||
* is a quad word, b is a single word and c is a single word, producing a five
|
||||
* word result.
|
||||
*/
|
||||
#ifndef HAVE_BN_QWMULW_ADDW
|
||||
static inline void
|
||||
bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
|
||||
BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
|
||||
BN_ULONG *out_r1, BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r3, r2, r1, r0;
|
||||
|
||||
bn_mulw_addw(a0, b, c, &c, &r0);
|
||||
bn_mulw_addw(a1, b, c, &c, &r1);
|
||||
bn_mulw_addw(a2, b, c, &c, &r2);
|
||||
bn_mulw_addw(a3, b, c, &c, &r3);
|
||||
|
||||
*out_r4 = c;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* bn_qwmulw_addqw_addw() computes
|
||||
* (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + (c3:c2:c1:c0) + d, where a
|
||||
* is a quad word, b is a single word, c is a quad word, and d is a single word,
|
||||
* producing a five word result.
|
||||
*/
|
||||
#ifndef HAVE_BN_QWMULW_ADDQW_ADDW
|
||||
static inline void
|
||||
bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
|
||||
BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
|
||||
BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
|
||||
BN_ULONG *out_r0)
|
||||
{
|
||||
BN_ULONG r3, r2, r1, r0;
|
||||
|
||||
bn_mulw_addw_addw(a0, b, c0, d, &d, &r0);
|
||||
bn_mulw_addw_addw(a1, b, c1, d, &d, &r1);
|
||||
bn_mulw_addw_addw(a2, b, c2, d, &d, &r2);
|
||||
bn_mulw_addw_addw(a3, b, c3, d, &d, &r3);
|
||||
|
||||
*out_r4 = d;
|
||||
*out_r3 = r3;
|
||||
*out_r2 = r2;
|
||||
*out_r1 = r1;
|
||||
*out_r0 = r0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $OpenBSD: bn_mul.c,v 1.37 2023/04/19 10:51:22 jsing Exp $ */
|
||||
/* $OpenBSD: bn_mul.c,v 1.38 2023/06/12 16:17:24 jsing Exp $ */
|
||||
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
|
||||
* All rights reserved.
|
||||
*
|
||||
|
@ -210,17 +210,13 @@ bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
|
|||
if (num <= 0)
|
||||
return 0;
|
||||
|
||||
#ifndef OPENSSL_SMALL_FOOTPRINT
|
||||
while (num & ~3) {
|
||||
bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
|
||||
bn_mulw_addw(a[1], w, carry, &carry, &r[1]);
|
||||
bn_mulw_addw(a[2], w, carry, &carry, &r[2]);
|
||||
bn_mulw_addw(a[3], w, carry, &carry, &r[3]);
|
||||
bn_qwmulw_addw(a[3], a[2], a[1], a[0], w, carry, &carry,
|
||||
&r[3], &r[2], &r[1], &r[0]);
|
||||
a += 4;
|
||||
r += 4;
|
||||
num -= 4;
|
||||
}
|
||||
#endif
|
||||
while (num) {
|
||||
bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
|
||||
a++;
|
||||
|
@ -247,17 +243,14 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
|
|||
if (num <= 0)
|
||||
return 0;
|
||||
|
||||
#ifndef OPENSSL_SMALL_FOOTPRINT
|
||||
while (num & ~3) {
|
||||
bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
|
||||
bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]);
|
||||
bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]);
|
||||
bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]);
|
||||
bn_qwmulw_addqw_addw(a[3], a[2], a[1], a[0], w,
|
||||
r[3], r[2], r[1], r[0], carry, &carry,
|
||||
&r[3], &r[2], &r[1], &r[0]);
|
||||
a += 4;
|
||||
r += 4;
|
||||
num -= 4;
|
||||
}
|
||||
#endif
|
||||
while (num) {
|
||||
bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
|
||||
a++;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue