aboutsummaryrefslogtreecommitdiff
path: root/src/Specific/montgomery64_2e198m17_4limbs/femul.c
blob: 3e5aad4760f7cbcd54785968e181492c9a81acd0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
  { const uint64_t x8 = in1[3];
  { const uint64_t x9 = in1[2];
  { const uint64_t x7 = in1[1];
  { const uint64_t x5 = in1[0];
  { const uint64_t x14 = in2[3];
  { const uint64_t x15 = in2[2];
  { const uint64_t x13 = in2[1];
  { const uint64_t x11 = in2[0];
  { uint64_t x18;  uint64_t x17 = _mulx_u64(x5, x11, &x18);
  { uint64_t x21;  uint64_t x20 = _mulx_u64(x5, x13, &x21);
  { uint64_t x24;  uint64_t x23 = _mulx_u64(x5, x15, &x24);
  { uint64_t x27;  uint64_t x26 = _mulx_u64(x5, x14, &x27);
  { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
  { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
  { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
  { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
  { uint64_t _;  uint64_t x41 = _mulx_u64(x17, 0xf0f0f0f0f0f0f0f1L, &_);
  { uint64_t x45;  uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffefL, &x45);
  { uint64_t x48;  uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
  { uint64_t x51;  uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
  { uint8_t x54;  uint64_t x53 = _mulx_u64_out_u8(x41, 0x3f, &x54);
  { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
  { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
  { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
  { uint8_t x64 = (x63 + x54);
  { uint64_t _; uint8_t x67 = _addcarryx_u64(0x0, x17, x44, &_);
  { uint64_t x69; uint8_t x70 = _addcarryx_u64(x67, x29, x56, &x69);
  { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x32, x59, &x72);
  { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x35, x62, &x75);
  { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x38, x64, &x78);
  { uint64_t x82;  uint64_t x81 = _mulx_u64(x7, x11, &x82);
  { uint64_t x85;  uint64_t x84 = _mulx_u64(x7, x13, &x85);
  { uint64_t x88;  uint64_t x87 = _mulx_u64(x7, x15, &x88);
  { uint64_t x91;  uint64_t x90 = _mulx_u64(x7, x14, &x91);
  { uint64_t x93; uint8_t x94 = _addcarryx_u64(0x0, x82, x84, &x93);
  { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x85, x87, &x96);
  { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x88, x90, &x99);
  { uint64_t x102; uint8_t _ = _addcarryx_u64(0x0, x100, x91, &x102);
  { uint64_t x105; uint8_t x106 = _addcarryx_u64(0x0, x69, x81, &x105);
  { uint64_t x108; uint8_t x109 = _addcarryx_u64(x106, x72, x93, &x108);
  { uint64_t x111; uint8_t x112 = _addcarryx_u64(x109, x75, x96, &x111);
  { uint64_t x114; uint8_t x115 = _addcarryx_u64(x112, x78, x99, &x114);
  { uint64_t x117; uint8_t x118 = _addcarryx_u64(x115, x79, x102, &x117);
  { uint64_t _;  uint64_t x120 = _mulx_u64(x105, 0xf0f0f0f0f0f0f0f1L, &_);
  { uint64_t x124;  uint64_t x123 = _mulx_u64(x120, 0xffffffffffffffefL, &x124);
  { uint64_t x127;  uint64_t x126 = _mulx_u64(x120, 0xffffffffffffffffL, &x127);
  { uint64_t x130;  uint64_t x129 = _mulx_u64(x120, 0xffffffffffffffffL, &x130);
  { uint8_t x133;  uint64_t x132 = _mulx_u64_out_u8(x120, 0x3f, &x133);
  { uint64_t x135; uint8_t x136 = _addcarryx_u64(0x0, x124, x126, &x135);
  { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x127, x129, &x138);
  { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x130, x132, &x141);
  { uint8_t x143 = (x142 + x133);
  { uint64_t _; uint8_t x146 = _addcarryx_u64(0x0, x105, x123, &_);
  { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x108, x135, &x148);
  { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x111, x138, &x151);
  { uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x114, x141, &x154);
  { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x117, x143, &x157);
  { uint8_t x159 = (x158 + x118);
  { uint64_t x162;  uint64_t x161 = _mulx_u64(x9, x11, &x162);
  { uint64_t x165;  uint64_t x164 = _mulx_u64(x9, x13, &x165);
  { uint64_t x168;  uint64_t x167 = _mulx_u64(x9, x15, &x168);
  { uint64_t x171;  uint64_t x170 = _mulx_u64(x9, x14, &x171);
  { uint64_t x173; uint8_t x174 = _addcarryx_u64(0x0, x162, x164, &x173);
  { uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x165, x167, &x176);
  { uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x168, x170, &x179);
  { uint64_t x182; uint8_t _ = _addcarryx_u64(0x0, x180, x171, &x182);
  { uint64_t x185; uint8_t x186 = _addcarryx_u64(0x0, x148, x161, &x185);
  { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x151, x173, &x188);
  { uint64_t x191; uint8_t x192 = _addcarryx_u64(x189, x154, x176, &x191);
  { uint64_t x194; uint8_t x195 = _addcarryx_u64(x192, x157, x179, &x194);
  { uint64_t x197; uint8_t x198 = _addcarryx_u64(x195, x159, x182, &x197);
  { uint64_t _;  uint64_t x200 = _mulx_u64(x185, 0xf0f0f0f0f0f0f0f1L, &_);
  { uint64_t x204;  uint64_t x203 = _mulx_u64(x200, 0xffffffffffffffefL, &x204);
  { uint64_t x207;  uint64_t x206 = _mulx_u64(x200, 0xffffffffffffffffL, &x207);
  { uint64_t x210;  uint64_t x209 = _mulx_u64(x200, 0xffffffffffffffffL, &x210);
  { uint8_t x213;  uint64_t x212 = _mulx_u64_out_u8(x200, 0x3f, &x213);
  { uint64_t x215; uint8_t x216 = _addcarryx_u64(0x0, x204, x206, &x215);
  { uint64_t x218; uint8_t x219 = _addcarryx_u64(x216, x207, x209, &x218);
  { uint64_t x221; uint8_t x222 = _addcarryx_u64(x219, x210, x212, &x221);
  { uint8_t x223 = (x222 + x213);
  { uint64_t _; uint8_t x226 = _addcarryx_u64(0x0, x185, x203, &_);
  { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x188, x215, &x228);
  { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x191, x218, &x231);
  { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x194, x221, &x234);
  { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x197, x223, &x237);
  { uint8_t x239 = (x238 + x198);
  { uint64_t x242;  uint64_t x241 = _mulx_u64(x8, x11, &x242);
  { uint64_t x245;  uint64_t x244 = _mulx_u64(x8, x13, &x245);
  { uint64_t x248;  uint64_t x247 = _mulx_u64(x8, x15, &x248);
  { uint64_t x251;  uint64_t x250 = _mulx_u64(x8, x14, &x251);
  { uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x242, x244, &x253);
  { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
  { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x248, x250, &x259);
  { uint64_t x262; uint8_t _ = _addcarryx_u64(0x0, x260, x251, &x262);
  { uint64_t x265; uint8_t x266 = _addcarryx_u64(0x0, x228, x241, &x265);
  { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
  { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
  { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
  { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x239, x262, &x277);
  { uint64_t _;  uint64_t x280 = _mulx_u64(x265, 0xf0f0f0f0f0f0f0f1L, &_);
  { uint64_t x284;  uint64_t x283 = _mulx_u64(x280, 0xffffffffffffffefL, &x284);
  { uint64_t x287;  uint64_t x286 = _mulx_u64(x280, 0xffffffffffffffffL, &x287);
  { uint64_t x290;  uint64_t x289 = _mulx_u64(x280, 0xffffffffffffffffL, &x290);
  { uint8_t x293;  uint64_t x292 = _mulx_u64_out_u8(x280, 0x3f, &x293);
  { uint64_t x295; uint8_t x296 = _addcarryx_u64(0x0, x284, x286, &x295);
  { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x287, x289, &x298);
  { uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x290, x292, &x301);
  { uint8_t x303 = (x302 + x293);
  { uint64_t _; uint8_t x306 = _addcarryx_u64(0x0, x265, x283, &_);
  { uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x268, x295, &x308);
  { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x271, x298, &x311);
  { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x274, x301, &x314);
  { uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x277, x303, &x317);
  { uint8_t x319 = (x318 + x278);
  { uint64_t x321; uint8_t x322 = _subborrow_u64(0x0, x308, 0xffffffffffffffefL, &x321);
  { uint64_t x324; uint8_t x325 = _subborrow_u64(x322, x311, 0xffffffffffffffffL, &x324);
  { uint64_t x327; uint8_t x328 = _subborrow_u64(x325, x314, 0xffffffffffffffffL, &x327);
  { uint64_t x330; uint8_t x331 = _subborrow_u64(x328, x317, 0x3f, &x330);
  { uint64_t _; uint8_t x334 = _subborrow_u64(x331, x319, 0x0, &_);
  { uint64_t x335 = cmovznz64(x334, x330, x317);
  { uint64_t x336 = cmovznz64(x334, x327, x314);
  { uint64_t x337 = cmovznz64(x334, x324, x311);
  { uint64_t x338 = cmovznz64(x334, x321, x308);
  out[0] = x338;
  out[1] = x337;
  out[2] = x336;
  out[3] = x335;
  }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}