quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

bn_mul.h (44706B)


      1 /**
      2  * \file bn_mul.h
      3  *
      4  * \brief Multi-precision integer library
      5  */
      6 /*
      7  *  Copyright The Mbed TLS Contributors
      8  *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
      9  */
     10 /*
     11  *      Multiply source vector [s] with b, add result
     12  *       to destination vector [d] and set carry c.
     13  *
     14  *      Currently supports:
     15  *
     16  *         . IA-32 (386+)         . AMD64 / EM64T
     17  *         . IA-32 (SSE2)         . Motorola 68000
     18  *         . PowerPC, 32-bit      . MicroBlaze
     19  *         . PowerPC, 64-bit      . TriCore
     20  *         . SPARC v8             . ARM v3+
     21  *         . Alpha                . MIPS32
     22  *         . C, longlong          . C, generic
     23  */
     24 #ifndef MBEDTLS_BN_MUL_H
     25 #define MBEDTLS_BN_MUL_H
     26 
     27 #include "mbedtls/build_info.h"
     28 
     29 #include "mbedtls/bignum.h"
     30 
     31 
     32 /*
     33  * Conversion macros for embedded constants:
     34  * build lists of mbedtls_mpi_uint's from lists of unsigned char's grouped by 8, 4 or 2
     35  */
     36 #if defined(MBEDTLS_HAVE_INT32)
     37 
     38 #define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d)               \
     39     ((mbedtls_mpi_uint) (a) <<  0) |                        \
     40     ((mbedtls_mpi_uint) (b) <<  8) |                        \
     41     ((mbedtls_mpi_uint) (c) << 16) |                        \
     42     ((mbedtls_mpi_uint) (d) << 24)
     43 
     44 #define MBEDTLS_BYTES_TO_T_UINT_2(a, b)                   \
     45     MBEDTLS_BYTES_TO_T_UINT_4(a, b, 0, 0)
     46 
     47 #define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \
     48     MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d),                \
     49     MBEDTLS_BYTES_TO_T_UINT_4(e, f, g, h)
     50 
     51 #else /* 64-bits */
     52 
     53 #define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h)   \
     54     ((mbedtls_mpi_uint) (a) <<  0) |                        \
     55     ((mbedtls_mpi_uint) (b) <<  8) |                        \
     56     ((mbedtls_mpi_uint) (c) << 16) |                        \
     57     ((mbedtls_mpi_uint) (d) << 24) |                        \
     58     ((mbedtls_mpi_uint) (e) << 32) |                        \
     59     ((mbedtls_mpi_uint) (f) << 40) |                        \
     60     ((mbedtls_mpi_uint) (g) << 48) |                        \
     61     ((mbedtls_mpi_uint) (h) << 56)
     62 
     63 #define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d)             \
     64     MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, 0, 0, 0, 0)
     65 
     66 #define MBEDTLS_BYTES_TO_T_UINT_2(a, b)                   \
     67     MBEDTLS_BYTES_TO_T_UINT_8(a, b, 0, 0, 0, 0, 0, 0)
     68 
     69 #endif /* bits in mbedtls_mpi_uint */
     70 
     71 /* *INDENT-OFF* */
     72 #if defined(MBEDTLS_HAVE_ASM)
     73 
     74 /* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
     75 #if defined(__GNUC__) && \
     76     ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
     77 
     78 /*
     79  * GCC < 5.0 treated the x86 ebx (which is used for the GOT) as a
     80  * fixed reserved register when building as PIC, leading to errors
     81  * like: bn_mul.h:46:13: error: PIC register clobbered by 'ebx' in 'asm'
     82  *
     83  * This is fixed by an improved register allocator in GCC 5+. From the
     84  * release notes:
     85  * Register allocation improvements: Reuse of the PIC hard register,
     86  * instead of using a fixed register, was implemented on x86/x86-64
     87  * targets. This improves generated PIC code performance as more hard
     88  * registers can be used.
     89  */
     90 #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
     91 #define MULADDC_CANNOT_USE_EBX
     92 #endif
     93 
     94 /*
     95  * Disable use of the i386 assembly code below if option -O0, to disable all
     96  * compiler optimisations, is passed, detected with __OPTIMIZE__
     97  * This is done as the number of registers used in the assembly code doesn't
     98  * work with the -O0 option.
     99  */
    100 #if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX)
    101 
    102 #define MULADDC_X1_INIT                     \
    103     { mbedtls_mpi_uint t;                   \
    104     asm(                                    \
    105         "movl   %%ebx, %0           \n\t"   \
    106         "movl   %5, %%esi           \n\t"   \
    107         "movl   %6, %%edi           \n\t"   \
    108         "movl   %7, %%ecx           \n\t"   \
    109         "movl   %8, %%ebx           \n\t"
    110 
    111 #define MULADDC_X1_CORE                     \
    112         "lodsl                      \n\t"   \
    113         "mull   %%ebx               \n\t"   \
    114         "addl   %%ecx,   %%eax      \n\t"   \
    115         "adcl   $0,      %%edx      \n\t"   \
    116         "addl   (%%edi), %%eax      \n\t"   \
    117         "adcl   $0,      %%edx      \n\t"   \
    118         "movl   %%edx,   %%ecx      \n\t"   \
    119         "stosl                      \n\t"
    120 
    121 #define MULADDC_X1_STOP                                 \
    122         "movl   %4, %%ebx       \n\t"                   \
    123         "movl   %%ecx, %1       \n\t"                   \
    124         "movl   %%edi, %2       \n\t"                   \
    125         "movl   %%esi, %3       \n\t"                   \
    126         : "=m" (t), "=m" (c), "=m" (d), "=m" (s)        \
    127         : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b)   \
    128         : "eax", "ebx", "ecx", "edx", "esi", "edi"      \
    129     ); }
    130 
    131 #if defined(MBEDTLS_HAVE_SSE2)
    132 
    133 #define MULADDC_X8_INIT MULADDC_X1_INIT
    134 
    135 #define MULADDC_X8_CORE                         \
    136         "movd     %%ecx,     %%mm1      \n\t"   \
    137         "movd     %%ebx,     %%mm0      \n\t"   \
    138         "movd     (%%edi),   %%mm3      \n\t"   \
    139         "paddq    %%mm3,     %%mm1      \n\t"   \
    140         "movd     (%%esi),   %%mm2      \n\t"   \
    141         "pmuludq  %%mm0,     %%mm2      \n\t"   \
    142         "movd     4(%%esi),  %%mm4      \n\t"   \
    143         "pmuludq  %%mm0,     %%mm4      \n\t"   \
    144         "movd     8(%%esi),  %%mm6      \n\t"   \
    145         "pmuludq  %%mm0,     %%mm6      \n\t"   \
    146         "movd     12(%%esi), %%mm7      \n\t"   \
    147         "pmuludq  %%mm0,     %%mm7      \n\t"   \
    148         "paddq    %%mm2,     %%mm1      \n\t"   \
    149         "movd     4(%%edi),  %%mm3      \n\t"   \
    150         "paddq    %%mm4,     %%mm3      \n\t"   \
    151         "movd     8(%%edi),  %%mm5      \n\t"   \
    152         "paddq    %%mm6,     %%mm5      \n\t"   \
    153         "movd     12(%%edi), %%mm4      \n\t"   \
    154         "paddq    %%mm4,     %%mm7      \n\t"   \
    155         "movd     %%mm1,     (%%edi)    \n\t"   \
    156         "movd     16(%%esi), %%mm2      \n\t"   \
    157         "pmuludq  %%mm0,     %%mm2      \n\t"   \
    158         "psrlq    $32,       %%mm1      \n\t"   \
    159         "movd     20(%%esi), %%mm4      \n\t"   \
    160         "pmuludq  %%mm0,     %%mm4      \n\t"   \
    161         "paddq    %%mm3,     %%mm1      \n\t"   \
    162         "movd     24(%%esi), %%mm6      \n\t"   \
    163         "pmuludq  %%mm0,     %%mm6      \n\t"   \
    164         "movd     %%mm1,     4(%%edi)   \n\t"   \
    165         "psrlq    $32,       %%mm1      \n\t"   \
    166         "movd     28(%%esi), %%mm3      \n\t"   \
    167         "pmuludq  %%mm0,     %%mm3      \n\t"   \
    168         "paddq    %%mm5,     %%mm1      \n\t"   \
    169         "movd     16(%%edi), %%mm5      \n\t"   \
    170         "paddq    %%mm5,     %%mm2      \n\t"   \
    171         "movd     %%mm1,     8(%%edi)   \n\t"   \
    172         "psrlq    $32,       %%mm1      \n\t"   \
    173         "paddq    %%mm7,     %%mm1      \n\t"   \
    174         "movd     20(%%edi), %%mm5      \n\t"   \
    175         "paddq    %%mm5,     %%mm4      \n\t"   \
    176         "movd     %%mm1,     12(%%edi)  \n\t"   \
    177         "psrlq    $32,       %%mm1      \n\t"   \
    178         "paddq    %%mm2,     %%mm1      \n\t"   \
    179         "movd     24(%%edi), %%mm5      \n\t"   \
    180         "paddq    %%mm5,     %%mm6      \n\t"   \
    181         "movd     %%mm1,     16(%%edi)  \n\t"   \
    182         "psrlq    $32,       %%mm1      \n\t"   \
    183         "paddq    %%mm4,     %%mm1      \n\t"   \
    184         "movd     28(%%edi), %%mm5      \n\t"   \
    185         "paddq    %%mm5,     %%mm3      \n\t"   \
    186         "movd     %%mm1,     20(%%edi)  \n\t"   \
    187         "psrlq    $32,       %%mm1      \n\t"   \
    188         "paddq    %%mm6,     %%mm1      \n\t"   \
    189         "movd     %%mm1,     24(%%edi)  \n\t"   \
    190         "psrlq    $32,       %%mm1      \n\t"   \
    191         "paddq    %%mm3,     %%mm1      \n\t"   \
    192         "movd     %%mm1,     28(%%edi)  \n\t"   \
    193         "addl     $32,       %%edi      \n\t"   \
    194         "addl     $32,       %%esi      \n\t"   \
    195         "psrlq    $32,       %%mm1      \n\t"   \
    196         "movd     %%mm1,     %%ecx      \n\t"
    197 
    198 #define MULADDC_X8_STOP                 \
    199         "emms                   \n\t"   \
    200         "movl   %4, %%ebx       \n\t"   \
    201         "movl   %%ecx, %1       \n\t"   \
    202         "movl   %%edi, %2       \n\t"   \
    203         "movl   %%esi, %3       \n\t"   \
    204         : "=m" (t), "=m" (c), "=m" (d), "=m" (s)        \
    205         : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b)   \
    206         : "eax", "ebx", "ecx", "edx", "esi", "edi"      \
    207     ); }                                                \
    208 
    209 #endif /* SSE2 */
    210 
    211 #endif /* i386 */
    212 
    213 #if defined(__amd64__) || defined (__x86_64__)
    214 
    215 #define MULADDC_X1_INIT                        \
    216     asm(                                    \
    217         "xorq   %%r8, %%r8\n"
    218 
    219 #define MULADDC_X1_CORE                        \
    220         "movq   (%%rsi), %%rax\n"           \
    221         "mulq   %%rbx\n"                    \
    222         "addq   $8, %%rsi\n"                \
    223         "addq   %%rcx, %%rax\n"             \
    224         "movq   %%r8, %%rcx\n"              \
    225         "adcq   $0, %%rdx\n"                \
    226         "nop    \n"                         \
    227         "addq   %%rax, (%%rdi)\n"           \
    228         "adcq   %%rdx, %%rcx\n"             \
    229         "addq   $8, %%rdi\n"
    230 
    231 #define MULADDC_X1_STOP                                              \
    232         : "+c" (c), "+D" (d), "+S" (s), "+m" (*(uint64_t (*)[16]) d) \
    233         : "b" (b), "m" (*(const uint64_t (*)[16]) s)                 \
    234         : "rax", "rdx", "r8"                                         \
    235     );
    236 
    237 #endif /* AMD64 */
    238 
    239 // The following assembly code assumes that a pointer will fit in a 64-bit register
    240 // (including ILP32 __aarch64__ ABIs such as on watchOS, hence the 2^32 - 1)
    241 #if defined(__aarch64__) && (UINTPTR_MAX == 0xfffffffful || UINTPTR_MAX == 0xfffffffffffffffful)
    242 
    243 /*
    244  * There are some issues around different compilers requiring different constraint
    245  * syntax for updating pointers from assembly code (see notes for
    246  * MBEDTLS_ASM_AARCH64_PTR_CONSTRAINT in common.h), especially on aarch64_32 (aka ILP32).
    247  *
    248  * For this reason we cast the pointers to/from uintptr_t here.
    249  */
    250 #define MULADDC_X1_INIT             \
    251     do { uintptr_t muladdc_d = (uintptr_t) d, muladdc_s = (uintptr_t) s; asm(
    252 
    253 #define MULADDC_X1_CORE             \
    254         "ldr x4, [%x2], #8  \n\t"   \
    255         "ldr x5, [%x1]      \n\t"   \
    256         "mul x6, x4, %4     \n\t"   \
    257         "umulh x7, x4, %4   \n\t"   \
    258         "adds x5, x5, x6    \n\t"   \
    259         "adc x7, x7, xzr    \n\t"   \
    260         "adds x5, x5, %0    \n\t"   \
    261         "adc %0, x7, xzr    \n\t"   \
    262         "str x5, [%x1], #8  \n\t"
    263 
    264 #define MULADDC_X1_STOP                                                 \
    265          : "+r" (c),                                                    \
    266            "+r" (muladdc_d),                                            \
    267            "+r" (muladdc_s),                                            \
    268            "+m" (*(uint64_t (*)[16]) d)                                 \
    269          : "r" (b), "m" (*(const uint64_t (*)[16]) s)                   \
    270          : "x4", "x5", "x6", "x7", "cc"                                 \
    271     ); d = (mbedtls_mpi_uint *)muladdc_d; s = (mbedtls_mpi_uint *)muladdc_s; } while (0);
    272 
    273 #endif /* Aarch64 */
    274 
    275 #if defined(__mc68020__) || defined(__mcpu32__)
    276 
    277 #define MULADDC_X1_INIT                 \
    278     asm(                                \
    279         "movl   %3, %%a2        \n\t"   \
    280         "movl   %4, %%a3        \n\t"   \
    281         "movl   %5, %%d3        \n\t"   \
    282         "movl   %6, %%d2        \n\t"   \
    283         "moveq  #0, %%d0        \n\t"
    284 
    285 #define MULADDC_X1_CORE                 \
    286         "movel  %%a2@+, %%d1    \n\t"   \
    287         "mulul  %%d2, %%d4:%%d1 \n\t"   \
    288         "addl   %%d3, %%d1      \n\t"   \
    289         "addxl  %%d0, %%d4      \n\t"   \
    290         "moveq  #0,   %%d3      \n\t"   \
    291         "addl   %%d1, %%a3@+    \n\t"   \
    292         "addxl  %%d4, %%d3      \n\t"
    293 
    294 #define MULADDC_X1_STOP                 \
    295         "movl   %%d3, %0        \n\t"   \
    296         "movl   %%a3, %1        \n\t"   \
    297         "movl   %%a2, %2        \n\t"   \
    298         : "=m" (c), "=m" (d), "=m" (s)              \
    299         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    300         : "d0", "d1", "d2", "d3", "d4", "a2", "a3"  \
    301     );
    302 
    303 #define MULADDC_X8_INIT MULADDC_X1_INIT
    304 
    305 #define MULADDC_X8_CORE                     \
    306         "movel  %%a2@+,  %%d1       \n\t"   \
    307         "mulul  %%d2,    %%d4:%%d1  \n\t"   \
    308         "addxl  %%d3,    %%d1       \n\t"   \
    309         "addxl  %%d0,    %%d4       \n\t"   \
    310         "addl   %%d1,    %%a3@+     \n\t"   \
    311         "movel  %%a2@+,  %%d1       \n\t"   \
    312         "mulul  %%d2,    %%d3:%%d1  \n\t"   \
    313         "addxl  %%d4,    %%d1       \n\t"   \
    314         "addxl  %%d0,    %%d3       \n\t"   \
    315         "addl   %%d1,    %%a3@+     \n\t"   \
    316         "movel  %%a2@+,  %%d1       \n\t"   \
    317         "mulul  %%d2,    %%d4:%%d1  \n\t"   \
    318         "addxl  %%d3,    %%d1       \n\t"   \
    319         "addxl  %%d0,    %%d4       \n\t"   \
    320         "addl   %%d1,    %%a3@+     \n\t"   \
    321         "movel  %%a2@+,  %%d1       \n\t"   \
    322         "mulul  %%d2,    %%d3:%%d1  \n\t"   \
    323         "addxl  %%d4,    %%d1       \n\t"   \
    324         "addxl  %%d0,    %%d3       \n\t"   \
    325         "addl   %%d1,    %%a3@+     \n\t"   \
    326         "movel  %%a2@+,  %%d1       \n\t"   \
    327         "mulul  %%d2,    %%d4:%%d1  \n\t"   \
    328         "addxl  %%d3,    %%d1       \n\t"   \
    329         "addxl  %%d0,    %%d4       \n\t"   \
    330         "addl   %%d1,    %%a3@+     \n\t"   \
    331         "movel  %%a2@+,  %%d1       \n\t"   \
    332         "mulul  %%d2,    %%d3:%%d1  \n\t"   \
    333         "addxl  %%d4,    %%d1       \n\t"   \
    334         "addxl  %%d0,    %%d3       \n\t"   \
    335         "addl   %%d1,    %%a3@+     \n\t"   \
    336         "movel  %%a2@+,  %%d1       \n\t"   \
    337         "mulul  %%d2,    %%d4:%%d1  \n\t"   \
    338         "addxl  %%d3,    %%d1       \n\t"   \
    339         "addxl  %%d0,    %%d4       \n\t"   \
    340         "addl   %%d1,    %%a3@+     \n\t"   \
    341         "movel  %%a2@+,  %%d1       \n\t"   \
    342         "mulul  %%d2,    %%d3:%%d1  \n\t"   \
    343         "addxl  %%d4,    %%d1       \n\t"   \
    344         "addxl  %%d0,    %%d3       \n\t"   \
    345         "addl   %%d1,    %%a3@+     \n\t"   \
    346         "addxl  %%d0,    %%d3       \n\t"
    347 
    348 #define MULADDC_X8_STOP MULADDC_X1_STOP
    349 
    350 #endif /* MC68000 */
    351 
    352 #if defined(__powerpc64__) || defined(__ppc64__)
    353 
    354 #if defined(__MACH__) && defined(__APPLE__)
    355 
    356 #define MULADDC_X1_INIT                     \
    357     asm(                                    \
    358         "ld     r3, %3              \n\t"   \
    359         "ld     r4, %4              \n\t"   \
    360         "ld     r5, %5              \n\t"   \
    361         "ld     r6, %6              \n\t"   \
    362         "addi   r3, r3, -8          \n\t"   \
    363         "addi   r4, r4, -8          \n\t"   \
    364         "addic  r5, r5,  0          \n\t"
    365 
    366 #define MULADDC_X1_CORE                     \
    367         "ldu    r7, 8(r3)           \n\t"   \
    368         "mulld  r8, r7, r6          \n\t"   \
    369         "mulhdu r9, r7, r6          \n\t"   \
    370         "adde   r8, r8, r5          \n\t"   \
    371         "ld     r7, 8(r4)           \n\t"   \
    372         "addze  r5, r9              \n\t"   \
    373         "addc   r8, r8, r7          \n\t"   \
    374         "stdu   r8, 8(r4)           \n\t"
    375 
    376 #define MULADDC_X1_STOP                     \
    377         "addze  r5, r5              \n\t"   \
    378         "addi   r4, r4, 8           \n\t"   \
    379         "addi   r3, r3, 8           \n\t"   \
    380         "std    r5, %0              \n\t"   \
    381         "std    r4, %1              \n\t"   \
    382         "std    r3, %2              \n\t"   \
    383         : "=m" (c), "=m" (d), "=m" (s)              \
    384         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    385         : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
    386     );
    387 
    388 
    389 #else /* __MACH__ && __APPLE__ */
    390 
    391 #define MULADDC_X1_INIT                     \
    392     asm(                                    \
    393         "ld     %%r3, %3            \n\t"   \
    394         "ld     %%r4, %4            \n\t"   \
    395         "ld     %%r5, %5            \n\t"   \
    396         "ld     %%r6, %6            \n\t"   \
    397         "addi   %%r3, %%r3, -8      \n\t"   \
    398         "addi   %%r4, %%r4, -8      \n\t"   \
    399         "addic  %%r5, %%r5,  0      \n\t"
    400 
    401 #define MULADDC_X1_CORE                     \
    402         "ldu    %%r7, 8(%%r3)       \n\t"   \
    403         "mulld  %%r8, %%r7, %%r6    \n\t"   \
    404         "mulhdu %%r9, %%r7, %%r6    \n\t"   \
    405         "adde   %%r8, %%r8, %%r5    \n\t"   \
    406         "ld     %%r7, 8(%%r4)       \n\t"   \
    407         "addze  %%r5, %%r9          \n\t"   \
    408         "addc   %%r8, %%r8, %%r7    \n\t"   \
    409         "stdu   %%r8, 8(%%r4)       \n\t"
    410 
    411 #define MULADDC_X1_STOP                     \
    412         "addze  %%r5, %%r5          \n\t"   \
    413         "addi   %%r4, %%r4, 8       \n\t"   \
    414         "addi   %%r3, %%r3, 8       \n\t"   \
    415         "std    %%r5, %0            \n\t"   \
    416         "std    %%r4, %1            \n\t"   \
    417         "std    %%r3, %2            \n\t"   \
    418         : "=m" (c), "=m" (d), "=m" (s)              \
    419         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    420         : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
    421     );
    422 
    423 #endif /* __MACH__ && __APPLE__ */
    424 
    425 #elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32  */
    426 
    427 #if defined(__MACH__) && defined(__APPLE__)
    428 
    429 #define MULADDC_X1_INIT                 \
    430     asm(                                \
    431         "lwz    r3, %3          \n\t"   \
    432         "lwz    r4, %4          \n\t"   \
    433         "lwz    r5, %5          \n\t"   \
    434         "lwz    r6, %6          \n\t"   \
    435         "addi   r3, r3, -4      \n\t"   \
    436         "addi   r4, r4, -4      \n\t"   \
    437         "addic  r5, r5,  0      \n\t"
    438 
    439 #define MULADDC_X1_CORE                 \
    440         "lwzu   r7, 4(r3)       \n\t"   \
    441         "mullw  r8, r7, r6      \n\t"   \
    442         "mulhwu r9, r7, r6      \n\t"   \
    443         "adde   r8, r8, r5      \n\t"   \
    444         "lwz    r7, 4(r4)       \n\t"   \
    445         "addze  r5, r9          \n\t"   \
    446         "addc   r8, r8, r7      \n\t"   \
    447         "stwu   r8, 4(r4)       \n\t"
    448 
    449 #define MULADDC_X1_STOP                 \
    450         "addze  r5, r5          \n\t"   \
    451         "addi   r4, r4, 4       \n\t"   \
    452         "addi   r3, r3, 4       \n\t"   \
    453         "stw    r5, %0          \n\t"   \
    454         "stw    r4, %1          \n\t"   \
    455         "stw    r3, %2          \n\t"   \
    456         : "=m" (c), "=m" (d), "=m" (s)              \
    457         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    458         : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
    459     );
    460 
    461 #else /* __MACH__ && __APPLE__ */
    462 
    463 #define MULADDC_X1_INIT                     \
    464     asm(                                    \
    465         "lwz    %%r3, %3            \n\t"   \
    466         "lwz    %%r4, %4            \n\t"   \
    467         "lwz    %%r5, %5            \n\t"   \
    468         "lwz    %%r6, %6            \n\t"   \
    469         "addi   %%r3, %%r3, -4      \n\t"   \
    470         "addi   %%r4, %%r4, -4      \n\t"   \
    471         "addic  %%r5, %%r5,  0      \n\t"
    472 
    473 #define MULADDC_X1_CORE                     \
    474         "lwzu   %%r7, 4(%%r3)       \n\t"   \
    475         "mullw  %%r8, %%r7, %%r6    \n\t"   \
    476         "mulhwu %%r9, %%r7, %%r6    \n\t"   \
    477         "adde   %%r8, %%r8, %%r5    \n\t"   \
    478         "lwz    %%r7, 4(%%r4)       \n\t"   \
    479         "addze  %%r5, %%r9          \n\t"   \
    480         "addc   %%r8, %%r8, %%r7    \n\t"   \
    481         "stwu   %%r8, 4(%%r4)       \n\t"
    482 
    483 #define MULADDC_X1_STOP                     \
    484         "addze  %%r5, %%r5          \n\t"   \
    485         "addi   %%r4, %%r4, 4       \n\t"   \
    486         "addi   %%r3, %%r3, 4       \n\t"   \
    487         "stw    %%r5, %0            \n\t"   \
    488         "stw    %%r4, %1            \n\t"   \
    489         "stw    %%r3, %2            \n\t"   \
    490         : "=m" (c), "=m" (d), "=m" (s)              \
    491         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    492         : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
    493     );
    494 
    495 #endif /* __MACH__ && __APPLE__ */
    496 
    497 #endif /* PPC32 */
    498 
    499 /*
    500  * The Sparc(64) assembly is reported to be broken.
    501  * Disable it for now, until we're able to fix it.
    502  */
    503 #if 0 && defined(__sparc__)
    504 #if defined(__sparc64__)
    505 
    506 #define MULADDC_X1_INIT                                 \
    507     asm(                                                \
    508                 "ldx     %3, %%o0               \n\t"   \
    509                 "ldx     %4, %%o1               \n\t"   \
    510                 "ld      %5, %%o2               \n\t"   \
    511                 "ld      %6, %%o3               \n\t"
    512 
    513 #define MULADDC_X1_CORE                                 \
    514                 "ld      [%%o0], %%o4           \n\t"   \
    515                 "inc     4, %%o0                \n\t"   \
    516                 "ld      [%%o1], %%o5           \n\t"   \
    517                 "umul    %%o3, %%o4, %%o4       \n\t"   \
    518                 "addcc   %%o4, %%o2, %%o4       \n\t"   \
    519                 "rd      %%y, %%g1              \n\t"   \
    520                 "addx    %%g1, 0, %%g1          \n\t"   \
    521                 "addcc   %%o4, %%o5, %%o4       \n\t"   \
    522                 "st      %%o4, [%%o1]           \n\t"   \
    523                 "addx    %%g1, 0, %%o2          \n\t"   \
    524                 "inc     4, %%o1                \n\t"
    525 
    526 #define MULADDC_X1_STOP                                 \
    527                 "st      %%o2, %0               \n\t"   \
    528                 "stx     %%o1, %1               \n\t"   \
    529                 "stx     %%o0, %2               \n\t"   \
    530         : "=m" (c), "=m" (d), "=m" (s)          \
    531         : "m" (s), "m" (d), "m" (c), "m" (b)    \
    532         : "g1", "o0", "o1", "o2", "o3", "o4",   \
    533           "o5"                                  \
    534         );
    535 
    536 #else /* __sparc64__ */
    537 
    538 #define MULADDC_X1_INIT                                 \
    539     asm(                                                \
    540                 "ld      %3, %%o0               \n\t"   \
    541                 "ld      %4, %%o1               \n\t"   \
    542                 "ld      %5, %%o2               \n\t"   \
    543                 "ld      %6, %%o3               \n\t"
    544 
    545 #define MULADDC_X1_CORE                                 \
    546                 "ld      [%%o0], %%o4           \n\t"   \
    547                 "inc     4, %%o0                \n\t"   \
    548                 "ld      [%%o1], %%o5           \n\t"   \
    549                 "umul    %%o3, %%o4, %%o4       \n\t"   \
    550                 "addcc   %%o4, %%o2, %%o4       \n\t"   \
    551                 "rd      %%y, %%g1              \n\t"   \
    552                 "addx    %%g1, 0, %%g1          \n\t"   \
    553                 "addcc   %%o4, %%o5, %%o4       \n\t"   \
    554                 "st      %%o4, [%%o1]           \n\t"   \
    555                 "addx    %%g1, 0, %%o2          \n\t"   \
    556                 "inc     4, %%o1                \n\t"
    557 
    558 #define MULADDC_X1_STOP                                 \
    559                 "st      %%o2, %0               \n\t"   \
    560                 "st      %%o1, %1               \n\t"   \
    561                 "st      %%o0, %2               \n\t"   \
    562         : "=m" (c), "=m" (d), "=m" (s)          \
    563         : "m" (s), "m" (d), "m" (c), "m" (b)    \
    564         : "g1", "o0", "o1", "o2", "o3", "o4",   \
    565           "o5"                                  \
    566         );
    567 
    568 #endif /* __sparc64__ */
    569 #endif /* __sparc__ */
    570 
    571 #if defined(__microblaze__) || defined(microblaze)
    572 
    573 #define MULADDC_X1_INIT                 \
    574     asm(                                \
    575         "lwi   r3,   %3         \n\t"   \
    576         "lwi   r4,   %4         \n\t"   \
    577         "lwi   r5,   %5         \n\t"   \
    578         "lwi   r6,   %6         \n\t"   \
    579         "andi  r7,   r6, 0xffff \n\t"   \
    580         "bsrli r6,   r6, 16     \n\t"
    581 
    582 #if(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
    583 #define MULADDC_LHUI                    \
    584         "lhui  r9,   r3,   0    \n\t"   \
    585         "addi  r3,   r3,   2    \n\t"   \
    586         "lhui  r8,   r3,   0    \n\t"
    587 #else
    588 #define MULADDC_LHUI                    \
    589         "lhui  r8,   r3,   0    \n\t"   \
    590         "addi  r3,   r3,   2    \n\t"   \
    591         "lhui  r9,   r3,   0    \n\t"
    592 #endif
    593 
    594 #define MULADDC_X1_CORE                    \
    595         MULADDC_LHUI                    \
    596         "addi  r3,   r3,   2    \n\t"   \
    597         "mul   r10,  r9,  r6    \n\t"   \
    598         "mul   r11,  r8,  r7    \n\t"   \
    599         "mul   r12,  r9,  r7    \n\t"   \
    600         "mul   r13,  r8,  r6    \n\t"   \
    601         "bsrli  r8, r10,  16    \n\t"   \
    602         "bsrli  r9, r11,  16    \n\t"   \
    603         "add   r13, r13,  r8    \n\t"   \
    604         "add   r13, r13,  r9    \n\t"   \
    605         "bslli r10, r10,  16    \n\t"   \
    606         "bslli r11, r11,  16    \n\t"   \
    607         "add   r12, r12, r10    \n\t"   \
    608         "addc  r13, r13,  r0    \n\t"   \
    609         "add   r12, r12, r11    \n\t"   \
    610         "addc  r13, r13,  r0    \n\t"   \
    611         "lwi   r10,  r4,   0    \n\t"   \
    612         "add   r12, r12, r10    \n\t"   \
    613         "addc  r13, r13,  r0    \n\t"   \
    614         "add   r12, r12,  r5    \n\t"   \
    615         "addc   r5, r13,  r0    \n\t"   \
    616         "swi   r12,  r4,   0    \n\t"   \
    617         "addi   r4,  r4,   4    \n\t"
    618 
    619 #define MULADDC_X1_STOP                 \
    620         "swi   r5,   %0         \n\t"   \
    621         "swi   r4,   %1         \n\t"   \
    622         "swi   r3,   %2         \n\t"   \
    623         : "=m" (c), "=m" (d), "=m" (s)              \
    624         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    625         : "r3", "r4", "r5", "r6", "r7", "r8",       \
    626           "r9", "r10", "r11", "r12", "r13"          \
    627     );
    628 
    629 #endif /* MicroBlaze */
    630 
    631 #if defined(__tricore__)
    632 
    633 #define MULADDC_X1_INIT                         \
    634     asm(                                        \
    635         "ld.a   %%a2, %3                \n\t"   \
    636         "ld.a   %%a3, %4                \n\t"   \
    637         "ld.w   %%d4, %5                \n\t"   \
    638         "ld.w   %%d1, %6                \n\t"   \
    639         "xor    %%d5, %%d5              \n\t"
    640 
    641 #define MULADDC_X1_CORE                         \
    642         "ld.w   %%d0,   [%%a2+]         \n\t"   \
    643         "madd.u %%e2, %%e4, %%d0, %%d1  \n\t"   \
    644         "ld.w   %%d0,   [%%a3]          \n\t"   \
    645         "addx   %%d2,    %%d2,  %%d0    \n\t"   \
    646         "addc   %%d3,    %%d3,    0     \n\t"   \
    647         "mov    %%d4,    %%d3           \n\t"   \
    648         "st.w  [%%a3+],  %%d2           \n\t"
    649 
    650 #define MULADDC_X1_STOP                         \
    651         "st.w   %0, %%d4                \n\t"   \
    652         "st.a   %1, %%a3                \n\t"   \
    653         "st.a   %2, %%a2                \n\t"   \
    654         : "=m" (c), "=m" (d), "=m" (s)          \
    655         : "m" (s), "m" (d), "m" (c), "m" (b)    \
    656         : "d0", "d1", "e2", "d4", "a2", "a3"    \
    657     );
    658 
    659 #endif /* TriCore */
    660 
    661 #if defined(__arm__)
    662 
    663 #if defined(__thumb__) && !defined(__thumb2__)
    664 #if defined(MBEDTLS_COMPILER_IS_GCC)
    665 /*
    666  * Thumb 1 ISA. This code path has only been tested successfully on gcc;
    667  * it does not compile on clang or armclang.
    668  */
    669 
    670 #if !defined(__OPTIMIZE__) && defined(__GNUC__)
    671 /*
    672  * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
    673  * our use of r7 below, unless -fomit-frame-pointer is passed.
    674  *
    675  * On the other hand, -fomit-frame-pointer is implied by any -Ox options with
    676  * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
    677  * clang and armcc5 under the same conditions).
    678  *
    679  * If gcc needs to use r7, we use r1 as a scratch register and have a few extra
    680  * instructions to preserve/restore it; otherwise, we can use r7 and avoid
    681  * the preserve/restore overhead.
    682  */
    683 #define MULADDC_SCRATCH              "RS .req r1         \n\t"
    684 #define MULADDC_PRESERVE_SCRATCH     "mov    r10, r1     \n\t"
    685 #define MULADDC_RESTORE_SCRATCH      "mov    r1, r10     \n\t"
    686 #define MULADDC_SCRATCH_CLOBBER      "r10"
    687 #else /* !defined(__OPTIMIZE__) && defined(__GNUC__) */
    688 #define MULADDC_SCRATCH              "RS .req r7         \n\t"
    689 #define MULADDC_PRESERVE_SCRATCH     ""
    690 #define MULADDC_RESTORE_SCRATCH      ""
    691 #define MULADDC_SCRATCH_CLOBBER      "r7"
    692 #endif /* !defined(__OPTIMIZE__) && defined(__GNUC__) */
    693 
    694 #define MULADDC_X1_INIT                                 \
    695     asm(                                                \
    696     MULADDC_SCRATCH                                     \
    697             "ldr    r0, %3                      \n\t"   \
    698             "ldr    r1, %4                      \n\t"   \
    699             "ldr    r2, %5                      \n\t"   \
    700             "ldr    r3, %6                      \n\t"   \
    701             "lsr    r4, r3, #16                 \n\t"   \
    702             "mov    r9, r4                      \n\t"   \
    703             "lsl    r4, r3, #16                 \n\t"   \
    704             "lsr    r4, r4, #16                 \n\t"   \
    705             "mov    r8, r4                      \n\t"   \
    706 
    707 
    708 #define MULADDC_X1_CORE                                 \
    709             MULADDC_PRESERVE_SCRATCH                    \
    710             "ldmia  r0!, {r6}                   \n\t"   \
    711             "lsr    RS, r6, #16                 \n\t"   \
    712             "lsl    r6, r6, #16                 \n\t"   \
    713             "lsr    r6, r6, #16                 \n\t"   \
    714             "mov    r4, r8                      \n\t"   \
    715             "mul    r4, r6                      \n\t"   \
    716             "mov    r3, r9                      \n\t"   \
    717             "mul    r6, r3                      \n\t"   \
    718             "mov    r5, r9                      \n\t"   \
    719             "mul    r5, RS                      \n\t"   \
    720             "mov    r3, r8                      \n\t"   \
    721             "mul    RS, r3                      \n\t"   \
    722             "lsr    r3, r6, #16                 \n\t"   \
    723             "add    r5, r5, r3                  \n\t"   \
    724             "lsr    r3, RS, #16                 \n\t"   \
    725             "add    r5, r5, r3                  \n\t"   \
    726             "add    r4, r4, r2                  \n\t"   \
    727             "mov    r2, #0                      \n\t"   \
    728             "adc    r5, r2                      \n\t"   \
    729             "lsl    r3, r6, #16                 \n\t"   \
    730             "add    r4, r4, r3                  \n\t"   \
    731             "adc    r5, r2                      \n\t"   \
    732             "lsl    r3, RS, #16                 \n\t"   \
    733             "add    r4, r4, r3                  \n\t"   \
    734             "adc    r5, r2                      \n\t"   \
    735             MULADDC_RESTORE_SCRATCH                     \
    736             "ldr    r3, [r1]                    \n\t"   \
    737             "add    r4, r4, r3                  \n\t"   \
    738             "adc    r2, r5                      \n\t"   \
    739             "stmia  r1!, {r4}                   \n\t"
    740 
    741 #define MULADDC_X1_STOP                                 \
    742             "str    r2, %0                      \n\t"   \
    743             "str    r1, %1                      \n\t"   \
    744             "str    r0, %2                      \n\t"   \
    745          : "=m" (c),  "=m" (d), "=m" (s)        \
    746          : "m" (s), "m" (d), "m" (c), "m" (b)   \
    747          : "r0", "r1", "r2", "r3", "r4", "r5",  \
    748            "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \
    749          );
    750 #endif /* !defined(__ARMCC_VERSION) && !defined(__clang__) */
    751 
    752 #elif (__ARM_ARCH >= 6) && \
    753     defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)
    754 /* Armv6-M (or later) with DSP Instruction Set Extensions.
    755  * Requires support for either Thumb 2 or Arm ISA.
    756  */
    757 
    758 #define MULADDC_X1_INIT                            \
    759     {                                              \
    760         mbedtls_mpi_uint tmp_a, tmp_b;             \
    761         asm volatile (
    762 
    763 #define MULADDC_X1_CORE                                         \
    764            ".p2align  2                                 \n\t"   \
    765             "ldr      %[a], [%[in]], #4                 \n\t"   \
    766             "ldr      %[b], [%[acc]]                    \n\t"   \
    767             "umaal    %[b], %[carry], %[scalar], %[a]   \n\t"   \
    768             "str      %[b], [%[acc]], #4                \n\t"
    769 
    770 #define MULADDC_X1_STOP                                      \
    771             : [a]      "=&r" (tmp_a),                        \
    772               [b]      "=&r" (tmp_b),                        \
    773               [in]     "+r"  (s),                            \
    774               [acc]    "+r"  (d),                            \
    775               [carry]  "+l"  (c)                             \
    776             : [scalar] "r"   (b)                             \
    777             : "memory"                                       \
    778         );                                                   \
    779     }
    780 
    781 #define MULADDC_X2_INIT                              \
    782     {                                                \
    783         mbedtls_mpi_uint tmp_a0, tmp_b0;             \
    784         mbedtls_mpi_uint tmp_a1, tmp_b1;             \
    785         asm volatile (
    786 
    787             /* - Make sure loop is 4-byte aligned to avoid stalls
    788              *   upon repeated non-word aligned instructions in
    789              *   some microarchitectures.
    790              * - Don't use ldm with post-increment or back-to-back
    791              *   loads with post-increment and same address register
    792              *   to avoid stalls on some microarchitectures.
    793              * - Bunch loads and stores to reduce latency on some
    794              *   microarchitectures. E.g., on Cortex-M4, the first
    795              *   in a series of load/store operations has latency
    796              *   2 cycles, while subsequent loads/stores are single-cycle. */
    797 #define MULADDC_X2_CORE                                           \
    798            ".p2align  2                                   \n\t"   \
    799             "ldr      %[a0], [%[in]],  #+8                \n\t"   \
    800             "ldr      %[b0], [%[acc]], #+8                \n\t"   \
    801             "ldr      %[a1], [%[in],  #-4]                \n\t"   \
    802             "ldr      %[b1], [%[acc], #-4]                \n\t"   \
    803             "umaal    %[b0], %[carry], %[scalar], %[a0]   \n\t"   \
    804             "umaal    %[b1], %[carry], %[scalar], %[a1]   \n\t"   \
    805             "str      %[b0], [%[acc], #-8]                \n\t"   \
    806             "str      %[b1], [%[acc], #-4]                \n\t"
    807 
    808 #define MULADDC_X2_STOP                                      \
    809             : [a0]     "=&r" (tmp_a0),                       \
    810               [b0]     "=&r" (tmp_b0),                       \
    811               [a1]     "=&r" (tmp_a1),                       \
    812               [b1]     "=&r" (tmp_b1),                       \
    813               [in]     "+r"  (s),                            \
    814               [acc]    "+r"  (d),                            \
    815               [carry]  "+l"  (c)                             \
    816             : [scalar] "r"   (b)                             \
    817             : "memory"                                       \
    818         );                                                   \
    819     }
    820 
    821 #else /* Thumb 2 or Arm ISA, without DSP extensions */
    822 
    823 #define MULADDC_X1_INIT                                 \
    824     asm(                                                \
    825             "ldr    r0, %3                      \n\t"   \
    826             "ldr    r1, %4                      \n\t"   \
    827             "ldr    r2, %5                      \n\t"   \
    828             "ldr    r3, %6                      \n\t"
    829 
    830 #define MULADDC_X1_CORE                                 \
    831             "ldr    r4, [r0], #4                \n\t"   \
    832             "mov    r5, #0                      \n\t"   \
    833             "ldr    r6, [r1]                    \n\t"   \
    834             "umlal  r2, r5, r3, r4              \n\t"   \
    835             "adds   r4, r6, r2                  \n\t"   \
    836             "adc    r2, r5, #0                  \n\t"   \
    837             "str    r4, [r1], #4                \n\t"
    838 
    839 #define MULADDC_X1_STOP                                 \
    840             "str    r2, %0                      \n\t"   \
    841             "str    r1, %1                      \n\t"   \
    842             "str    r0, %2                      \n\t"   \
    843          : "=m" (c),  "=m" (d), "=m" (s)        \
    844          : "m" (s), "m" (d), "m" (c), "m" (b)   \
    845          : "r0", "r1", "r2", "r3", "r4", "r5",  \
    846            "r6", "cc"                     \
    847          );
    848 
    849 #endif /* ISA codepath selection */
    850 
    851 #endif /* defined(__arm__) */
    852 
    853 #if defined(__alpha__)
    854 
    855 #define MULADDC_X1_INIT                 \
    856     asm(                                \
    857         "ldq    $1, %3          \n\t"   \
    858         "ldq    $2, %4          \n\t"   \
    859         "ldq    $3, %5          \n\t"   \
    860         "ldq    $4, %6          \n\t"
    861 
    862 #define MULADDC_X1_CORE                 \
    863         "ldq    $6,  0($1)      \n\t"   \
    864         "addq   $1,  8, $1      \n\t"   \
    865         "mulq   $6, $4, $7      \n\t"   \
    866         "umulh  $6, $4, $6      \n\t"   \
    867         "addq   $7, $3, $7      \n\t"   \
    868         "cmpult $7, $3, $3      \n\t"   \
    869         "ldq    $5,  0($2)      \n\t"   \
    870         "addq   $7, $5, $7      \n\t"   \
    871         "cmpult $7, $5, $5      \n\t"   \
    872         "stq    $7,  0($2)      \n\t"   \
    873         "addq   $2,  8, $2      \n\t"   \
    874         "addq   $6, $3, $3      \n\t"   \
    875         "addq   $5, $3, $3      \n\t"
    876 
    877 #define MULADDC_X1_STOP                 \
    878         "stq    $3, %0          \n\t"   \
    879         "stq    $2, %1          \n\t"   \
    880         "stq    $1, %2          \n\t"   \
    881         : "=m" (c), "=m" (d), "=m" (s)              \
    882         : "m" (s), "m" (d), "m" (c), "m" (b)        \
    883         : "$1", "$2", "$3", "$4", "$5", "$6", "$7"  \
    884     );
    885 #endif /* Alpha */
    886 
    887 #if defined(__mips__) && !defined(__mips64)
    888 
    889 #define MULADDC_X1_INIT                 \
    890     asm(                                \
    891         "lw     $10, %3         \n\t"   \
    892         "lw     $11, %4         \n\t"   \
    893         "lw     $12, %5         \n\t"   \
    894         "lw     $13, %6         \n\t"
    895 
    896 #define MULADDC_X1_CORE                 \
    897         "lw     $14, 0($10)     \n\t"   \
    898         "multu  $13, $14        \n\t"   \
    899         "addi   $10, $10, 4     \n\t"   \
    900         "mflo   $14             \n\t"   \
    901         "mfhi   $9              \n\t"   \
    902         "addu   $14, $12, $14   \n\t"   \
    903         "lw     $15, 0($11)     \n\t"   \
    904         "sltu   $12, $14, $12   \n\t"   \
    905         "addu   $15, $14, $15   \n\t"   \
    906         "sltu   $14, $15, $14   \n\t"   \
    907         "addu   $12, $12, $9    \n\t"   \
    908         "sw     $15, 0($11)     \n\t"   \
    909         "addu   $12, $12, $14   \n\t"   \
    910         "addi   $11, $11, 4     \n\t"
    911 
    912 #define MULADDC_X1_STOP                 \
    913         "sw     $12, %0         \n\t"   \
    914         "sw     $11, %1         \n\t"   \
    915         "sw     $10, %2         \n\t"   \
    916         : "=m" (c), "=m" (d), "=m" (s)                      \
    917         : "m" (s), "m" (d), "m" (c), "m" (b)                \
    918         : "$9", "$10", "$11", "$12", "$13", "$14", "$15", "lo", "hi" \
    919     );
    920 
    921 #endif /* MIPS */
    922 #endif /* GNUC */
    923 
    924 #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
    925 
    926 #define MULADDC_X1_INIT                         \
    927     __asm   mov     esi, s                      \
    928     __asm   mov     edi, d                      \
    929     __asm   mov     ecx, c                      \
    930     __asm   mov     ebx, b
    931 
    932 #define MULADDC_X1_CORE                         \
    933     __asm   lodsd                               \
    934     __asm   mul     ebx                         \
    935     __asm   add     eax, ecx                    \
    936     __asm   adc     edx, 0                      \
    937     __asm   add     eax, [edi]                  \
    938     __asm   adc     edx, 0                      \
    939     __asm   mov     ecx, edx                    \
    940     __asm   stosd
    941 
    942 #define MULADDC_X1_STOP                         \
    943     __asm   mov     c, ecx                      \
    944     __asm   mov     d, edi                      \
    945     __asm   mov     s, esi
    946 
    947 #if defined(MBEDTLS_HAVE_SSE2)
    948 
    949 #define EMIT __asm _emit
    950 
    951 #define MULADDC_X8_INIT MULADDC_X1_INIT
    952 
    953 #define MULADDC_X8_CORE                         \
    954     EMIT 0x0F  EMIT 0x6E  EMIT 0xC9             \
    955     EMIT 0x0F  EMIT 0x6E  EMIT 0xC3             \
    956     EMIT 0x0F  EMIT 0x6E  EMIT 0x1F             \
    957     EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
    958     EMIT 0x0F  EMIT 0x6E  EMIT 0x16             \
    959     EMIT 0x0F  EMIT 0xF4  EMIT 0xD0             \
    960     EMIT 0x0F  EMIT 0x6E  EMIT 0x66  EMIT 0x04  \
    961     EMIT 0x0F  EMIT 0xF4  EMIT 0xE0             \
    962     EMIT 0x0F  EMIT 0x6E  EMIT 0x76  EMIT 0x08  \
    963     EMIT 0x0F  EMIT 0xF4  EMIT 0xF0             \
    964     EMIT 0x0F  EMIT 0x6E  EMIT 0x7E  EMIT 0x0C  \
    965     EMIT 0x0F  EMIT 0xF4  EMIT 0xF8             \
    966     EMIT 0x0F  EMIT 0xD4  EMIT 0xCA             \
    967     EMIT 0x0F  EMIT 0x6E  EMIT 0x5F  EMIT 0x04  \
    968     EMIT 0x0F  EMIT 0xD4  EMIT 0xDC             \
    969     EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x08  \
    970     EMIT 0x0F  EMIT 0xD4  EMIT 0xEE             \
    971     EMIT 0x0F  EMIT 0x6E  EMIT 0x67  EMIT 0x0C  \
    972     EMIT 0x0F  EMIT 0xD4  EMIT 0xFC             \
    973     EMIT 0x0F  EMIT 0x7E  EMIT 0x0F             \
    974     EMIT 0x0F  EMIT 0x6E  EMIT 0x56  EMIT 0x10  \
    975     EMIT 0x0F  EMIT 0xF4  EMIT 0xD0             \
    976     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
    977     EMIT 0x0F  EMIT 0x6E  EMIT 0x66  EMIT 0x14  \
    978     EMIT 0x0F  EMIT 0xF4  EMIT 0xE0             \
    979     EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
    980     EMIT 0x0F  EMIT 0x6E  EMIT 0x76  EMIT 0x18  \
    981     EMIT 0x0F  EMIT 0xF4  EMIT 0xF0             \
    982     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x04  \
    983     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
    984     EMIT 0x0F  EMIT 0x6E  EMIT 0x5E  EMIT 0x1C  \
    985     EMIT 0x0F  EMIT 0xF4  EMIT 0xD8             \
    986     EMIT 0x0F  EMIT 0xD4  EMIT 0xCD             \
    987     EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x10  \
    988     EMIT 0x0F  EMIT 0xD4  EMIT 0xD5             \
    989     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x08  \
    990     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
    991     EMIT 0x0F  EMIT 0xD4  EMIT 0xCF             \
    992     EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x14  \
    993     EMIT 0x0F  EMIT 0xD4  EMIT 0xE5             \
    994     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x0C  \
    995     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
    996     EMIT 0x0F  EMIT 0xD4  EMIT 0xCA             \
    997     EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x18  \
    998     EMIT 0x0F  EMIT 0xD4  EMIT 0xF5             \
    999     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x10  \
   1000     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
   1001     EMIT 0x0F  EMIT 0xD4  EMIT 0xCC             \
   1002     EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x1C  \
   1003     EMIT 0x0F  EMIT 0xD4  EMIT 0xDD             \
   1004     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x14  \
   1005     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
   1006     EMIT 0x0F  EMIT 0xD4  EMIT 0xCE             \
   1007     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x18  \
   1008     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
   1009     EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
   1010     EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x1C  \
   1011     EMIT 0x83  EMIT 0xC7  EMIT 0x20             \
   1012     EMIT 0x83  EMIT 0xC6  EMIT 0x20             \
   1013     EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
   1014     EMIT 0x0F  EMIT 0x7E  EMIT 0xC9
   1015 
   1016 #define MULADDC_X8_STOP                         \
   1017     EMIT 0x0F  EMIT 0x77                        \
   1018     __asm   mov     c, ecx                      \
   1019     __asm   mov     d, edi                      \
   1020     __asm   mov     s, esi
   1021 
   1022 #endif /* SSE2 */
   1023 #endif /* MSVC */
   1024 
   1025 #endif /* MBEDTLS_HAVE_ASM */
   1026 
   1027 #if !defined(MULADDC_X1_CORE)
   1028 #if defined(MBEDTLS_HAVE_UDBL)
   1029 
   1030 #define MULADDC_X1_INIT                 \
   1031 {                                       \
   1032     mbedtls_t_udbl r;                           \
   1033     mbedtls_mpi_uint r0, r1;
   1034 
   1035 #define MULADDC_X1_CORE                 \
   1036     r   = *(s++) * (mbedtls_t_udbl) b;          \
   1037     r0  = (mbedtls_mpi_uint) r;                   \
   1038     r1  = (mbedtls_mpi_uint)( r >> biL );         \
   1039     r0 += c;  r1 += (r0 <  c);          \
   1040     r0 += *d; r1 += (r0 < *d);          \
   1041     c = r1; *(d++) = r0;
   1042 
   1043 #define MULADDC_X1_STOP                 \
   1044 }
   1045 
   1046 #else /* MBEDTLS_HAVE_UDBL */
   1047 
   1048 #define MULADDC_X1_INIT                 \
   1049 {                                       \
   1050     mbedtls_mpi_uint s0, s1, b0, b1;              \
   1051     mbedtls_mpi_uint r0, r1, rx, ry;              \
   1052     b0 = ( b << biH ) >> biH;           \
   1053     b1 = ( b >> biH );
   1054 
   1055 #define MULADDC_X1_CORE                 \
   1056     s0 = ( *s << biH ) >> biH;          \
   1057     s1 = ( *s >> biH ); s++;            \
   1058     rx = s0 * b1; r0 = s0 * b0;         \
   1059     ry = s1 * b0; r1 = s1 * b1;         \
   1060     r1 += ( rx >> biH );                \
   1061     r1 += ( ry >> biH );                \
   1062     rx <<= biH; ry <<= biH;             \
   1063     r0 += rx; r1 += (r0 < rx);          \
   1064     r0 += ry; r1 += (r0 < ry);          \
   1065     r0 +=  c; r1 += (r0 <  c);          \
   1066     r0 += *d; r1 += (r0 < *d);          \
   1067     c = r1; *(d++) = r0;
   1068 
   1069 #define MULADDC_X1_STOP                 \
   1070 }
   1071 
   1072 #endif /* C (longlong) */
   1073 #endif /* C (generic)  */
   1074 
   1075 #if !defined(MULADDC_X2_CORE)
   1076 #define MULADDC_X2_INIT MULADDC_X1_INIT
   1077 #define MULADDC_X2_STOP MULADDC_X1_STOP
   1078 #define MULADDC_X2_CORE MULADDC_X1_CORE MULADDC_X1_CORE
   1079 #endif /* MULADDC_X2_CORE */
   1080 
   1081 #if !defined(MULADDC_X4_CORE)
   1082 #define MULADDC_X4_INIT MULADDC_X2_INIT
   1083 #define MULADDC_X4_STOP MULADDC_X2_STOP
   1084 #define MULADDC_X4_CORE MULADDC_X2_CORE MULADDC_X2_CORE
   1085 #endif /* MULADDC_X4_CORE */
   1086 
   1087 #if !defined(MULADDC_X8_CORE)
   1088 #define MULADDC_X8_INIT MULADDC_X4_INIT
   1089 #define MULADDC_X8_STOP MULADDC_X4_STOP
   1090 #define MULADDC_X8_CORE MULADDC_X4_CORE MULADDC_X4_CORE
   1091 #endif /* MULADDC_X8_CORE */
   1092 
   1093 /* *INDENT-ON* */
   1094 #endif /* bn_mul.h */