|
|
@@ -0,0 +1,123 @@
|
|
|
+From fa8f23284d4689c2a737204b337b58d966dcbd8c Mon Sep 17 00:00:00 2001
|
|
|
+From: Sean Parkinson <[email protected]>
|
|
|
+Date: Fri, 20 Aug 2021 10:23:38 +1000
|
|
|
+Subject: [PATCH] Maths x86 asm: change asm snippets to get compiling
|
|
|
+
|
|
|
+TFM:
|
|
|
+ Use register or memory for c0, c1, c2 in SQRADD and SQRADD2.
|
|
|
+SP:
|
|
|
+ Use register or memory for vl, vh, vo in SP_ASM_MUL_ADD,
|
|
|
+SP_ASM_MUL_ADD2 and SP_ASM_SQR_ADD.
|
|
|
+---
|
|
|
+ wolfcrypt/src/asm.c | 29 ++++++++++++++++++++---------
|
|
|
+ wolfcrypt/src/sp_int.c | 6 +++---
|
|
|
+ 2 files changed, 23 insertions(+), 12 deletions(-)
|
|
|
+
|
|
|
+diff --git a/wolfcrypt/src/asm.c b/wolfcrypt/src/asm.c
|
|
|
+index b7f53d073..a37e75e02 100644
|
|
|
+--- a/wolfcrypt/src/asm.c
|
|
|
++++ b/wolfcrypt/src/asm.c
|
|
|
+@@ -698,33 +698,39 @@ __asm__( \
|
|
|
+
|
|
|
+ #define SQRADD(i, j) \
|
|
|
+ __asm__( \
|
|
|
+- "movl %6,%%eax \n\t" \
|
|
|
++ "movl %3,%%eax \n\t" \
|
|
|
+ "mull %%eax \n\t" \
|
|
|
+ "addl %%eax,%0 \n\t" \
|
|
|
+ "adcl %%edx,%1 \n\t" \
|
|
|
+ "adcl $0,%2 \n\t" \
|
|
|
+- :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
|
|
|
++ :"+rm"(c0), "+rm"(c1), "+rm"(c2) \
|
|
|
++ : "m"(i) \
|
|
|
++ :"%eax","%edx","cc");
|
|
|
+
|
|
|
+ #define SQRADD2(i, j) \
|
|
|
+ __asm__( \
|
|
|
+- "movl %6,%%eax \n\t" \
|
|
|
+- "mull %7 \n\t" \
|
|
|
++ "movl %3,%%eax \n\t" \
|
|
|
++ "mull %4 \n\t" \
|
|
|
+ "addl %%eax,%0 \n\t" \
|
|
|
+ "adcl %%edx,%1 \n\t" \
|
|
|
+ "adcl $0,%2 \n\t" \
|
|
|
+ "addl %%eax,%0 \n\t" \
|
|
|
+ "adcl %%edx,%1 \n\t" \
|
|
|
+ "adcl $0,%2 \n\t" \
|
|
|
+- :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc");
|
|
|
++ :"+rm"(c0), "+rm"(c1), "+rm"(c2) \
|
|
|
++ : "m"(i), "m"(j) \
|
|
|
++ :"%eax","%edx", "cc");
|
|
|
+
|
|
|
+ #define SQRADDSC(i, j) \
|
|
|
+-__asm__( \
|
|
|
++__asm__( \
|
|
|
+ "movl %3,%%eax \n\t" \
|
|
|
+ "mull %4 \n\t" \
|
|
|
+ "movl %%eax,%0 \n\t" \
|
|
|
+ "movl %%edx,%1 \n\t" \
|
|
|
+ "xorl %2,%2 \n\t" \
|
|
|
+- :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
|
|
|
++ :"=r"(sc0), "=r"(sc1), "=r"(sc2) \
|
|
|
++ : "g"(i), "g"(j) \
|
|
|
++ :"%eax","%edx","cc");
|
|
|
+
|
|
|
+ #define SQRADDAC(i, j) \
|
|
|
+ __asm__( \
|
|
|
+@@ -733,7 +739,9 @@ __asm__( \
|
|
|
+ "addl %%eax,%0 \n\t" \
|
|
|
+ "adcl %%edx,%1 \n\t" \
|
|
|
+ "adcl $0,%2 \n\t" \
|
|
|
+- :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
|
|
|
++ :"=r"(sc0), "=r"(sc1), "=r"(sc2) \
|
|
|
++ : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \
|
|
|
++ :"%eax","%edx","cc");
|
|
|
+
|
|
|
+ #define SQRADDDB \
|
|
|
+ __asm__( \
|
|
|
+@@ -743,7 +751,10 @@ __asm__( \
|
|
|
+ "addl %6,%0 \n\t" \
|
|
|
+ "adcl %7,%1 \n\t" \
|
|
|
+ "adcl %8,%2 \n\t" \
|
|
|
+- :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
|
|
|
++ :"=r"(c0), "=r"(c1), "=r"(c2) \
|
|
|
++ : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), \
|
|
|
++ "r"(sc2) \
|
|
|
++ : "cc");
|
|
|
+
|
|
|
+ #elif defined(TFM_X86_64)
|
|
|
+ /* x86-64 optimized */
|
|
|
+diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c
|
|
|
+index 6070faaa9..d26702e47 100644
|
|
|
+--- a/wolfcrypt/src/sp_int.c
|
|
|
++++ b/wolfcrypt/src/sp_int.c
|
|
|
+@@ -477,7 +477,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
|
|
+ "addl %%eax, %[l] \n\t" \
|
|
|
+ "adcl %%edx, %[h] \n\t" \
|
|
|
+ "adcl $0 , %[o] \n\t" \
|
|
|
+- : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
|
|
|
++ : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
|
|
|
+ : [a] "r" (va), [b] "r" (vb) \
|
|
|
+ : "eax", "edx", "cc" \
|
|
|
+ )
|
|
|
+@@ -503,7 +503,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
|
|
+ "addl %%eax, %[l] \n\t" \
|
|
|
+ "adcl %%edx, %[h] \n\t" \
|
|
|
+ "adcl $0 , %[o] \n\t" \
|
|
|
+- : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
|
|
|
++ : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
|
|
|
+ : [a] "r" (va), [b] "r" (vb) \
|
|
|
+ : "eax", "edx", "cc" \
|
|
|
+ )
|
|
|
+@@ -542,7 +542,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
|
|
+ "addl %%eax, %[l] \n\t" \
|
|
|
+ "adcl %%edx, %[h] \n\t" \
|
|
|
+ "adcl $0 , %[o] \n\t" \
|
|
|
+- : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
|
|
|
++ : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
|
|
|
+ : [a] "m" (va) \
|
|
|
+ : "eax", "edx", "cc" \
|
|
|
+ )
|
|
|
+--
|
|
|
+2.31.1
|
|
|
+
|