diff options
Diffstat (limited to 'src/Specific/montgomery32_2e174m17')
-rw-r--r-- | src/Specific/montgomery32_2e174m17/feadd.c | 86 | ||||
-rw-r--r-- | src/Specific/montgomery32_2e174m17/femul.c | 538 | ||||
-rw-r--r-- | src/Specific/montgomery32_2e174m17/fenz.c | 42 | ||||
-rw-r--r-- | src/Specific/montgomery32_2e174m17/feopp.c | 80 | ||||
-rw-r--r-- | src/Specific/montgomery32_2e174m17/fesub.c | 86 |
5 files changed, 395 insertions, 437 deletions
diff --git a/src/Specific/montgomery32_2e174m17/feadd.c b/src/Specific/montgomery32_2e174m17/feadd.c index 0190a5e63..2c1967002 100644 --- a/src/Specific/montgomery32_2e174m17/feadd.c +++ b/src/Specific/montgomery32_2e174m17/feadd.c @@ -1,46 +1,40 @@ -#include <stdint.h> -#include <stdbool.h> -#include <x86intrin.h> -#include "liblow.h" - -#include "feadd.h" - -typedef unsigned int uint128_t __attribute__((mode(TI))); - -#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER)) -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294 -#define _subborrow_u32 __builtin_ia32_sbb_u32 -#define _subborrow_u64 __builtin_ia32_sbb_u64 -#endif - -#undef force_inline -#define force_inline __attribute__((always_inline)) - -void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15) -{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25); -{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28); -{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31); -{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34); -{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37); -{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40); -{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffef, &x43); -{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46); -{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49); -{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52); -{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55); -{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58); -{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_); -{ uint32_t x63 = cmovznz(x62, x58, x40); -{ uint32_t x64 = cmovznz(x62, x55, x37); -{ uint32_t x65 = cmovznz(x62, x52, x34); -{ uint32_t x66 = cmovznz(x62, x49, x31); -{ uint32_t x67 = cmovznz(x62, x46, x28); -{ uint32_t x68 = cmovznz(x62, x43, x25); -out[0] = x63; -out[1] = x64; -out[2] = x65; -out[3] = x66; -out[4] = x67; -out[5] = x68; -}}}}}}}}}}}}}}}}}}} -// caller: uint64_t out[6]; +static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) { + { const uint32_t x12 = in1[5]; + { const uint32_t x13 = in1[4]; + { const uint32_t x11 = in1[3]; + { const uint32_t x9 = in1[2]; + { const uint32_t x7 = in1[1]; + { const uint32_t x5 = in1[0]; + { const uint32_t x22 = in2[5]; + { const uint32_t x23 = in2[4]; + { const uint32_t x21 = in2[3]; + { const uint32_t x19 = in2[2]; + { const uint32_t x17 = in2[1]; + { const uint32_t x15 = in2[0]; + { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25); + { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28); + { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31); + { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34); + { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37); + { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40); + { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffef, &x43); + { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46); + { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49); + { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52); + { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55); + { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58); + { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_); + { uint32_t x63 = cmovznz(x62, x58, x40); + { uint32_t x64 = cmovznz(x62, x55, x37); + { uint32_t x65 = cmovznz(x62, x52, x34); + { uint32_t x66 = cmovznz(x62, x49, x31); + { uint32_t x67 = cmovznz(x62, x46, x28); + { uint32_t x68 = cmovznz(x62, x43, x25); + out[0] = x68; + out[1] = x67; + out[2] = x66; + out[3] = x65; + out[4] = x64; + out[5] = x63; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} diff --git a/src/Specific/montgomery32_2e174m17/femul.c b/src/Specific/montgomery32_2e174m17/femul.c index 7b1f772c8..e713bda8f 100644 --- a/src/Specific/montgomery32_2e174m17/femul.c +++ b/src/Specific/montgomery32_2e174m17/femul.c @@ -1,272 +1,266 @@ -#include <stdint.h> -#include <stdbool.h> -#include <x86intrin.h> -#include "liblow.h" - -#include "femul.h" - -typedef unsigned int uint128_t __attribute__((mode(TI))); - -#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER)) -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294 -#define _subborrow_u32 __builtin_ia32_sbb_u32 -#define _subborrow_u64 __builtin_ia32_sbb_u64 -#endif - -#undef force_inline -#define force_inline __attribute__((always_inline)) - -void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15) -{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26); -{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29); -{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32); -{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35); -{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38); -{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41); -{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43); -{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46); -{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49); -{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52); -{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55); -{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58); -{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xf0f0f0f1, &_); -{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffef, &x65); -{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68); -{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71); -{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74); -{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77); -{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80); -{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82); -{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85); -{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88); -{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91); -{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94); -{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97); -{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_); -{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103); -{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106); -{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109); -{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112); -{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115); -{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118); -{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122); -{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125); -{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128); -{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131); -{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134); -{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137); -{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139); -{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142); -{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145); -{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148); -{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151); -{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154); -{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157); -{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160); -{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163); -{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166); -{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169); -{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172); -{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175); -{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xf0f0f0f1, &_); -{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffef, &x182); -{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185); -{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188); -{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191); -{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194); -{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197); -{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199); -{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202); -{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205); -{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208); -{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211); -{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214); -{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_); -{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220); -{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223); -{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226); -{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229); -{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232); -{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235); -{ uint8_t x237 = (x236 + x176); -{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240); -{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243); -{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246); -{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249); -{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252); -{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255); -{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257); -{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260); -{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263); -{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266); -{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269); -{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272); -{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275); -{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278); -{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281); -{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284); -{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287); -{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290); -{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293); -{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xf0f0f0f1, &_); -{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffef, &x300); -{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303); -{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306); -{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309); -{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312); -{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315); -{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317); -{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320); -{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323); -{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326); -{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329); -{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332); -{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_); -{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338); -{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341); -{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344); -{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347); -{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350); -{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353); -{ uint8_t x355 = (x354 + x294); -{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358); -{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361); -{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364); -{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367); -{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370); -{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373); -{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375); -{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378); -{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381); -{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384); -{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387); -{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390); -{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393); -{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396); -{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399); -{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402); -{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405); -{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408); -{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411); -{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xf0f0f0f1, &_); -{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffef, &x418); -{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421); -{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424); -{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427); -{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430); -{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433); -{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435); -{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438); -{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441); -{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444); -{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447); -{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450); -{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_); -{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456); -{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459); -{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462); -{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465); -{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468); -{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471); -{ uint8_t x473 = (x472 + x412); -{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476); -{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479); -{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482); -{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485); -{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488); -{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491); -{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493); -{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496); -{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499); -{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502); -{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505); -{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508); -{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511); -{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514); -{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517); -{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520); -{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523); -{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526); -{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529); -{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xf0f0f0f1, &_); -{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffef, &x536); -{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539); -{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542); -{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545); -{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548); -{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551); -{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553); -{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556); -{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559); -{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562); -{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565); -{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568); -{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_); -{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574); -{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577); -{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580); -{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583); -{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586); -{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589); -{ uint8_t x591 = (x590 + x530); -{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594); -{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597); -{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600); -{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603); -{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606); -{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609); -{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611); -{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614); -{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617); -{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620); -{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623); -{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626); -{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629); -{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632); -{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635); -{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638); -{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641); -{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644); -{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647); -{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xf0f0f0f1, &_); -{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffef, &x654); -{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657); -{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660); -{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663); -{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666); -{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669); -{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671); -{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674); -{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677); -{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680); -{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683); -{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686); -{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_); -{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692); -{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695); -{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698); -{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701); -{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704); -{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707); -{ uint8_t x709 = (x708 + x648); -{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffef, &x711); -{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714); -{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717); -{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720); -{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723); -{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726); -{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_); -{ uint32_t x731 = cmovznz(x730, x726, x707); -{ uint32_t x732 = cmovznz(x730, x723, x704); -{ uint32_t x733 = cmovznz(x730, x720, x701); -{ uint32_t x734 = cmovznz(x730, x717, x698); -{ uint32_t x735 = cmovznz(x730, x714, x695); -{ uint32_t x736 = cmovznz(x730, x711, x692); -out[0] = x731; -out[1] = x732; -out[2] = x733; -out[3] = x734; -out[4] = x735; -out[5] = x736; -}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} -// caller: uint64_t out[6]; +static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) { + { const uint32_t x12 = in1[5]; + { const uint32_t x13 = in1[4]; + { const uint32_t x11 = in1[3]; + { const uint32_t x9 = in1[2]; + { const uint32_t x7 = in1[1]; + { const uint32_t x5 = in1[0]; + { const uint32_t x22 = in2[5]; + { const uint32_t x23 = in2[4]; + { const uint32_t x21 = in2[3]; + { const uint32_t x19 = in2[2]; + { const uint32_t x17 = in2[1]; + { const uint32_t x15 = in2[0]; + { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26); + { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29); + { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32); + { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35); + { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38); + { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41); + { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43); + { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46); + { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49); + { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52); + { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55); + { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58); + { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xf0f0f0f1, &_); + { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffef, &x65); + { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68); + { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71); + { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74); + { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77); + { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80); + { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82); + { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85); + { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88); + { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91); + { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94); + { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97); + { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_); + { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103); + { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106); + { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109); + { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112); + { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115); + { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118); + { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122); + { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125); + { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128); + { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131); + { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134); + { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137); + { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139); + { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142); + { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145); + { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148); + { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151); + { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154); + { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157); + { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160); + { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163); + { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166); + { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169); + { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172); + { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175); + { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xf0f0f0f1, &_); + { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffef, &x182); + { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185); + { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188); + { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191); + { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194); + { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197); + { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199); + { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202); + { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205); + { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208); + { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211); + { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214); + { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_); + { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220); + { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223); + { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226); + { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229); + { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232); + { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235); + { uint8_t x237 = (x236 + x176); + { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240); + { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243); + { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246); + { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249); + { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252); + { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255); + { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257); + { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260); + { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263); + { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266); + { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269); + { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272); + { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275); + { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278); + { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281); + { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284); + { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287); + { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290); + { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293); + { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xf0f0f0f1, &_); + { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffef, &x300); + { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303); + { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306); + { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309); + { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312); + { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315); + { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317); + { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320); + { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323); + { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326); + { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329); + { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332); + { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_); + { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338); + { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341); + { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344); + { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347); + { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350); + { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353); + { uint8_t x355 = (x354 + x294); + { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358); + { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361); + { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364); + { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367); + { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370); + { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373); + { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375); + { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378); + { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381); + { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384); + { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387); + { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390); + { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393); + { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396); + { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399); + { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402); + { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405); + { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408); + { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411); + { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xf0f0f0f1, &_); + { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffef, &x418); + { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421); + { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424); + { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427); + { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430); + { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433); + { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435); + { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438); + { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441); + { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444); + { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447); + { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450); + { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_); + { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456); + { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459); + { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462); + { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465); + { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468); + { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471); + { uint8_t x473 = (x472 + x412); + { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476); + { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479); + { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482); + { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485); + { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488); + { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491); + { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493); + { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496); + { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499); + { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502); + { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505); + { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508); + { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511); + { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514); + { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517); + { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520); + { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523); + { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526); + { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529); + { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xf0f0f0f1, &_); + { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffef, &x536); + { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539); + { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542); + { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545); + { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548); + { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551); + { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553); + { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556); + { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559); + { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562); + { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565); + { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568); + { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_); + { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574); + { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577); + { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580); + { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583); + { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586); + { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589); + { uint8_t x591 = (x590 + x530); + { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594); + { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597); + { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600); + { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603); + { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606); + { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609); + { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611); + { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614); + { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617); + { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620); + { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623); + { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626); + { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629); + { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632); + { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635); + { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638); + { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641); + { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644); + { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647); + { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xf0f0f0f1, &_); + { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffef, &x654); + { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657); + { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660); + { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663); + { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666); + { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669); + { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671); + { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674); + { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677); + { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680); + { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683); + { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686); + { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_); + { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692); + { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695); + { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698); + { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701); + { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704); + { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707); + { uint8_t x709 = (x708 + x648); + { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffef, &x711); + { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714); + { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717); + { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720); + { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723); + { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726); + { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_); + { uint32_t x731 = cmovznz(x730, x726, x707); + { uint32_t x732 = cmovznz(x730, x723, x704); + { uint32_t x733 = cmovznz(x730, x720, x701); + { uint32_t x734 = cmovznz(x730, x717, x698); + { uint32_t x735 = cmovznz(x730, x714, x695); + { uint32_t x736 = cmovznz(x730, x711, x692); + out[0] = x736; + out[1] = x735; + out[2] = x734; + out[3] = x733; + out[4] = x732; + out[5] = x731; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} diff --git a/src/Specific/montgomery32_2e174m17/fenz.c b/src/Specific/montgomery32_2e174m17/fenz.c index 6d8132b20..2e0454af1 100644 --- a/src/Specific/montgomery32_2e174m17/fenz.c +++ b/src/Specific/montgomery32_2e174m17/fenz.c @@ -1,27 +1,15 @@ -#include <stdint.h> -#include <stdbool.h> -#include <x86intrin.h> -#include "liblow.h" - -#include "fenz.h" - -typedef unsigned int uint128_t __attribute__((mode(TI))); - -#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER)) -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294 -#define _subborrow_u32 __builtin_ia32_sbb_u32 -#define _subborrow_u64 __builtin_ia32_sbb_u64 -#endif - -#undef force_inline -#define force_inline __attribute__((always_inline)) - -void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2) -{ uint32_t x11 = (x10 | x9); -{ uint32_t x12 = (x8 | x11); -{ uint32_t x13 = (x6 | x12); -{ uint32_t x14 = (x4 | x13); -{ uint32_t x15 = (x2 | x14); -out[0] = x15; -}}}}} -// caller: uint64_t out[1]; +static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) { + { const uint32_t x9 = in1[5]; + { const uint32_t x10 = in1[4]; + { const uint32_t x8 = in1[3]; + { const uint32_t x6 = in1[2]; + { const uint32_t x4 = in1[1]; + { const uint32_t x2 = in1[0]; + { uint32_t x11 = (x10 | x9); + { uint32_t x12 = (x8 | x11); + { uint32_t x13 = (x6 | x12); + { uint32_t x14 = (x4 | x13); + { uint32_t x15 = (x2 | x14); + out[0] = x15; + }}}}}}}}}}} +} diff --git a/src/Specific/montgomery32_2e174m17/feopp.c b/src/Specific/montgomery32_2e174m17/feopp.c index a5c384418..c4e407fe1 100644 --- a/src/Specific/montgomery32_2e174m17/feopp.c +++ b/src/Specific/montgomery32_2e174m17/feopp.c @@ -1,46 +1,34 @@ -#include <stdint.h> -#include <stdbool.h> -#include <x86intrin.h> -#include "liblow.h" - -#include "feopp.h" - -typedef unsigned int uint128_t __attribute__((mode(TI))); - -#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER)) -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294 -#define _subborrow_u32 __builtin_ia32_sbb_u32 -#define _subborrow_u64 __builtin_ia32_sbb_u64 -#endif - -#undef force_inline -#define force_inline __attribute__((always_inline)) - -void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2) -{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12); -{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15); -{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18); -{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21); -{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24); -{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27); -{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff); -{ uint32_t x30 = (x29 & 0xffffffef); -{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32); -{ uint32_t x34 = (x29 & 0xffffffff); -{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36); -{ uint32_t x38 = (x29 & 0xffffffff); -{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40); -{ uint32_t x42 = (x29 & 0xffffffff); -{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44); -{ uint32_t x46 = (x29 & 0xffffffff); -{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48); -{ uint32_t x50 = (x29 & 0x3fff); -{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52); -out[0] = x52; -out[1] = x48; -out[2] = x44; -out[3] = x40; -out[4] = x36; -out[5] = x32; -}}}}}}}}}}}}}}}}}}} -// caller: uint64_t out[6]; +static void feopp(uint32_t out[6], const uint32_t in1[6]) { + { const uint32_t x9 = in1[5]; + { const uint32_t x10 = in1[4]; + { const uint32_t x8 = in1[3]; + { const uint32_t x6 = in1[2]; + { const uint32_t x4 = in1[1]; + { const uint32_t x2 = in1[0]; + { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12); + { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15); + { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18); + { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21); + { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24); + { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27); + { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff); + { uint32_t x30 = (x29 & 0xffffffef); + { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32); + { uint32_t x34 = (x29 & 0xffffffff); + { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36); + { uint32_t x38 = (x29 & 0xffffffff); + { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40); + { uint32_t x42 = (x29 & 0xffffffff); + { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44); + { uint32_t x46 = (x29 & 0xffffffff); + { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48); + { uint32_t x50 = (x29 & 0x3fff); + { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52); + out[0] = x32; + out[1] = x36; + out[2] = x40; + out[3] = x44; + out[4] = x48; + out[5] = x52; + }}}}}}}}}}}}}}}}}}}}}}}}} +} diff --git a/src/Specific/montgomery32_2e174m17/fesub.c b/src/Specific/montgomery32_2e174m17/fesub.c index 1b2b1408e..0e395b59f 100644 --- a/src/Specific/montgomery32_2e174m17/fesub.c +++ b/src/Specific/montgomery32_2e174m17/fesub.c @@ -1,46 +1,40 @@ -#include <stdint.h> -#include <stdbool.h> -#include <x86intrin.h> -#include "liblow.h" - -#include "fesub.h" - -typedef unsigned int uint128_t __attribute__((mode(TI))); - -#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER)) -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294 -#define _subborrow_u32 __builtin_ia32_sbb_u32 -#define _subborrow_u64 __builtin_ia32_sbb_u64 -#endif - -#undef force_inline -#define force_inline __attribute__((always_inline)) - -void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15) -{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25); -{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28); -{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31); -{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34); -{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37); -{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40); -{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff); -{ uint32_t x43 = (x42 & 0xffffffef); -{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45); -{ uint32_t x47 = (x42 & 0xffffffff); -{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49); -{ uint32_t x51 = (x42 & 0xffffffff); -{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53); -{ uint32_t x55 = (x42 & 0xffffffff); -{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57); -{ uint32_t x59 = (x42 & 0xffffffff); -{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61); -{ uint32_t x63 = (x42 & 0x3fff); -{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65); -out[0] = x65; -out[1] = x61; -out[2] = x57; -out[3] = x53; -out[4] = x49; -out[5] = x45; -}}}}}}}}}}}}}}}}}}} -// caller: uint64_t out[6]; +static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) { + { const uint32_t x12 = in1[5]; + { const uint32_t x13 = in1[4]; + { const uint32_t x11 = in1[3]; + { const uint32_t x9 = in1[2]; + { const uint32_t x7 = in1[1]; + { const uint32_t x5 = in1[0]; + { const uint32_t x22 = in2[5]; + { const uint32_t x23 = in2[4]; + { const uint32_t x21 = in2[3]; + { const uint32_t x19 = in2[2]; + { const uint32_t x17 = in2[1]; + { const uint32_t x15 = in2[0]; + { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25); + { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28); + { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31); + { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34); + { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37); + { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40); + { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff); + { uint32_t x43 = (x42 & 0xffffffef); + { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45); + { uint32_t x47 = (x42 & 0xffffffff); + { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49); + { uint32_t x51 = (x42 & 0xffffffff); + { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53); + { uint32_t x55 = (x42 & 0xffffffff); + { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57); + { uint32_t x59 = (x42 & 0xffffffff); + { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61); + { uint32_t x63 = (x42 & 0x3fff); + { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65); + out[0] = x45; + out[1] = x49; + out[2] = x53; + out[3] = x57; + out[4] = x61; + out[5] = x65; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} |